19
|
1 #!/usr/bin/perl -w
|
|
2 #==========================================================================================
|
|
3 # Date:
|
|
4 # Title:
|
|
5 # Comment: Program to plot gene structure
|
|
6 # Input: 1.
|
|
7 # 2.
|
|
8 # 3.
|
|
9 # Output: output file of gene structure graph by html or svg formt
|
|
10 # Test Usage:
|
|
11 #========================================================================================
|
|
12 #use strict;
|
|
13 my $version=1.00;
|
|
14 use SVG;
|
|
15 use Getopt::Long;
|
|
16 my %opt;
|
|
17 GetOptions(\%opt,"g=s","l=s","span=s","c=s","o=s","out=s","cen:s","mark=s","h");
|
|
18 if (!( defined $opt{o}) || defined $opt{h}) {
|
|
19 &usage;
|
|
20 }
|
|
21 my $span=$opt{span};
|
|
22 #my $sample_cloumn=$opt{n};
|
|
23 my $mark=$opt{mark};
|
|
24 my @mark=split/\#/,$mark;
|
|
25 my $genelist=$opt{g};
|
|
26 #===============================Define Attribute==========================================
|
|
27 my %attribute=(
|
|
28 canvas=>{
|
|
29 'width'=>1500,
|
|
30 'height'=>1800
|
|
31 },
|
|
32 text=>{
|
|
33 'stroke'=>"#000000",
|
|
34 'fill'=>"none",
|
|
35 'stroke-width'=>0.5
|
|
36 },
|
|
37 line=>{
|
|
38 'stroke'=>"black",
|
|
39 'stroke-width'=>1
|
|
40 },
|
|
41 csv=>{
|
|
42 'stroke'=>"red",
|
|
43 'stroke-width'=>0.5
|
|
44 },
|
|
45 exon=>{
|
|
46 'stroke'=>"black",
|
|
47 'stroke-width'=>1
|
|
48 },
|
|
49 intron=>{
|
|
50 'stroke'=>"black",
|
|
51 'stroke-width'=>1.5
|
|
52 },
|
|
53 font=>{
|
|
54 'fill'=>"#000000",
|
|
55 'font-size'=>12,
|
|
56 'font-size2'=>10,
|
|
57 #'font-weight'=>'bold',
|
|
58 'font-family'=>"Arial"
|
|
59 #'font-family'=>"ArialNarrow-bold"
|
|
60 },
|
|
61 rect=>{
|
|
62 'fill'=>"lightgreen",
|
|
63 'stroke'=>"black",
|
|
64 'stroke-width'=>0.5
|
|
65 },
|
|
66 readwidth=>0.5
|
|
67 );
|
|
68 #############################s#define start coordinate and scale
|
|
69 open(TXT,">$opt{out}");
|
|
70 open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}";
|
|
71 my %length;
|
|
72 while (my $aline=<LENGTH>) {
|
|
73 chomp $aline;
|
|
74 next if($aline=~/^\#/);
|
|
75 my @temp=split/\t/,$aline;
|
|
76 $temp[0]=~s/^c/C/;
|
|
77 $length{$temp[0]}=$temp[1];
|
|
78 }
|
|
79 close LENGTH;
|
|
80 #---------------------------------------------------------------
|
|
81 open(GENE,"$opt{g}")||die"cannot open the file $opt{g}";
|
|
82 my %genelist;
|
|
83 while (my $aline=<GENE>) {
|
|
84 chomp $aline;#LOC_Os01g01280 Chr1 133291 134685 +
|
|
85 next if($aline=~/^\#/);
|
|
86 my @temp=split/\t/,$aline;
|
|
87 if ($temp[1]=~/^Chr(\d)$/) {
|
|
88 $temp[1]="Chr0$1";
|
|
89 }
|
|
90 push @{$genelist{$temp[1]}},[$temp[0],$temp[2],$temp[3]];
|
|
91
|
|
92 }
|
|
93 close GENE;
|
|
94 #my %have_gene;
|
|
95 #foreach my $chr (sort keys %genelist) {
|
|
96 # my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
|
|
97 # my $start=$genelist[0][1];
|
|
98 # my $end=$genelist[0][2];
|
|
99 # for (my $i=0;$i<@genelist ;$i++) {
|
|
100 # if ($gene) {
|
|
101 # }
|
|
102 # }
|
|
103 #}
|
|
104
|
|
105 my %gene_desity;
|
|
106 foreach my $chr (sort keys %genelist) {
|
|
107 my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
|
|
108 for (my $i=0;$i<@genelist ;$i++) {
|
|
109 my $start=int($genelist[$i][1]/$span);
|
|
110 my $end=int($genelist[$i][2]/$span);
|
|
111 #my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]};
|
|
112 if ($start==$end) {
|
|
113 $gene_desity{$chr}[$start]++;
|
|
114 }
|
|
115 else{
|
|
116 for (my $k=$start;$k<=$end ;$k++) {
|
|
117 $gene_desity{$chr}[$k]++;
|
|
118 }
|
|
119 }
|
|
120 }
|
|
121 }
|
|
122 #------------------------------------------region_gene_number-------------------------
|
|
123 my $max_gene_number=0;
|
|
124 my $total=0;
|
|
125 foreach my $chr (sort keys %genelist) {
|
|
126 for (my $i=0;$i<@{$gene_desity{$chr}} ;$i++) {
|
|
127 if (!(defined($gene_desity{$chr}[$i]))) {
|
|
128 $gene_desity{$chr}[$i]=0;
|
|
129 }
|
|
130 if ($gene_desity{$chr}[$i]>$max_gene_number) {
|
|
131 $max_gene_number=$gene_desity{$chr}[$i];
|
|
132 #print "$gene_desity{$chr}[$i]\n";
|
|
133 }
|
|
134 #print TXT "$i\t$gene_desity[$i]\n";
|
|
135 $total+=$gene_desity{$chr}[$i];
|
|
136 #print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
|
|
137 }
|
|
138 }
|
|
139 #print "Gene max:$max_gene_number\ntotal:$total\n";
|
|
140
|
|
141 #---------------------------------------------------------------
|
|
142 my %centromere;
|
|
143 if (defined($opt{cen})) {
|
|
144 open CEN,"$opt{cen}";
|
|
145 while (my $aline=<CEN>) {
|
|
146 chomp $aline;
|
|
147 next if($aline=~/^\#/);
|
|
148 my @temp=split/\t/,$aline;
|
|
149 $temp[0]=~s/^c/C/;
|
|
150 $centromere{$temp[0]}[0]=$temp[1];
|
|
151 $centromere{$temp[0]}[1]=$temp[2];
|
|
152 }
|
|
153 close CEN;
|
|
154 }
|
|
155
|
|
156 #---------------------------------------------------------------
|
|
157 my $max_length=0;
|
|
158 foreach my $chr (keys %length) {
|
|
159 if ($max_length<$length{$chr}) {
|
|
160 $max_length=$length{$chr};
|
|
161 }
|
|
162 print "$chr\n";
|
|
163 }
|
|
164 #====================================cluster data=======================================
|
|
165 open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}";
|
|
166 my %cluster;
|
|
167 my %cluster_density;
|
|
168 #my @sample=qw(39B3 3PA3 3LC3);
|
|
169 my @cluster_non_add;
|
|
170 while (my $aline=<CLUSTER>) {
|
|
171 next if($aline=~/^\#/);
|
|
172 chomp $aline;##Chr MajorLength Percent end 19B1
|
|
173 my @temp=split/\t/,$aline;
|
|
174 my @ID=split/\:/,$temp[0];
|
|
175 my @posi=split/\-/,$ID[1];
|
|
176 my @all_rpkm=@temp;
|
|
177 shift @all_rpkm;
|
|
178 shift @all_rpkm;
|
|
179 shift @all_rpkm;
|
|
180 # for (my $s=0;$s<@all_rpkm ;$s++) {#log transfer
|
|
181 # $all_rpkm[$s]=log2($all_rpkm[$s]);
|
|
182 # }
|
|
183 push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],@all_rpkm];#ID start end rpkm(19B1,1PA1,1LC1);
|
|
184 }
|
|
185 close CLUSTER;
|
|
186 my %max_cluster;
|
|
187 my $chr_number=0;
|
|
188 print "@mark\n$mark\n";
|
|
189 foreach my $chr (sort keys %cluster) {
|
|
190 for (my $i=0;$i<@mark ;$i++) {
|
|
191 $max_cluster{$chr}[$i]=0;
|
|
192 }
|
|
193 $chr_number++;
|
|
194 }
|
|
195 foreach my $chr (sort keys %cluster) {
|
|
196 @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}};
|
|
197 for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
|
|
198 for (my $s=0;$s<@mark;$s++) {
|
|
199 if ($cluster{$chr}[$i][3+$s]>$max_cluster{$chr}) {
|
|
200 $max_cluster{$chr}[$s]=$cluster{$chr}[$i][3+$s];
|
|
201 }
|
|
202 }
|
|
203 }
|
|
204
|
|
205 }
|
|
206 foreach my $chr (sort keys %max_cluster) {
|
|
207 for (my $s=0; $s<@mark;$s++) {
|
|
208 # print "$max_cluster{$chr}[$s]\n";
|
|
209 }
|
|
210 }
|
|
211 #---------------------------------------------------------------------------------------
|
|
212 foreach my $chr(keys %cluster) {
|
|
213 for(my $i=0;$i<$#{$cluster{$chr}};$i++) {
|
|
214 my $start=int($cluster{$chr}[$i][1]/$span);
|
|
215 my $end=int($cluster{$chr}[$i][2]/$span);
|
|
216 if ($start==$end) {
|
|
217 for (my $s=0;$s<@mark ;$s++) {
|
|
218 $cluster_density{$chr}[$start][$s]+=$cluster{$chr}[$i][3+$s];
|
|
219 }
|
|
220
|
|
221 }
|
|
222 else{
|
|
223 for (my $m=$start;$m<=$end ;$m++) {
|
|
224 for (my $s=0;$s<@mark ;$s++) {
|
|
225 $cluster_density{$chr}[$m][$s]+=$cluster{$chr}[$i][3+$s];
|
|
226 }
|
|
227 }
|
|
228 }
|
|
229 }
|
|
230 }
|
|
231 my %max_cluster_density;
|
|
232 my $max_all_density=0;
|
|
233 foreach my $chr (sort keys %cluster) {#
|
|
234 for (my $s=0;$s<@mark ;$s++) {
|
|
235 for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
|
|
236 $max_cluster_density{$chr}[$s]=0;
|
|
237 }
|
|
238 }
|
|
239
|
|
240 }
|
|
241 foreach my $chr (sort keys %cluster_density) {
|
|
242 print "$#{$cluster_density{$chr}}\n";
|
|
243 for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) {
|
|
244 print TXT "$chr\t$k";
|
|
245 for (my $s=0;$s<@mark;$s++) {
|
|
246 if (!(defined($cluster_density{$chr}[$k][$s]))) {
|
|
247 $cluster_density{$chr}[$k][$s]=0;
|
|
248 }
|
|
249 if ($cluster_density{$chr}[$k][$s]>$max_cluster_density{$chr}[$s]) {
|
|
250 $max_cluster_density{$chr}[$s]=$cluster_density{$chr}[$k][$s];
|
|
251 }
|
|
252 if ($cluster_density{$chr}[$k][$s]>$max_all_density) {
|
|
253 $max_all_density=$cluster_density{$chr}[$k][$s];
|
|
254 }
|
|
255 print TXT "\t$cluster_density{$chr}[$k][$s]";
|
|
256 }
|
|
257 print TXT "\n";
|
|
258 }
|
|
259 }
|
|
260 print "max density: $max_all_density\n";
|
|
261 #--------------------------------------------------------------------
|
|
262 my $top_margin=30;
|
|
263 my $tail_margin=30;
|
|
264 my $XOFFSET=50;
|
|
265 my $YOFFSET=60;
|
|
266 my $chr_length=600;
|
|
267 my $Xscale=$chr_length/$max_length;#¶¨ÒåXÖá±ÈÀý³ß 1:1000 xÖáµÄ×ø±ê³¤¶È¶¼Òª°´Õմ˱ÈÀý³ß»»Ëã
|
|
268 #my $high_cov=$high_cov9B1=0.5;#¶¨Òå·åͼ×î¸ß·å
|
|
269 #my $Yscale=1/$high_cov;#¶¨ÒåYÖá±ÈÀý³ß 1:60 yÖáµÄ×ø±ê³¤¶È¶¼Òª°´Õմ˱ÈÀý³ß»»Ëã
|
|
270 #========================================New canvas============================
|
|
271 #### Starting ####
|
|
272 #н¨»²¼
|
|
273 my $width=1000;
|
|
274 my $heigth=100+130*$chr_number;
|
|
275 my $svg=SVG->new(width=>$width, height=>$heigth);
|
|
276 #»Í¼Æðʼµã
|
|
277 my $canvas_start_x=$XOFFSET;
|
|
278 my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#°´ÕÕ±ÈÀý³ß »Ïß
|
|
279 my $canvas_start_y=$YOFFSET;
|
|
280 my $canvas_end_y=$YOFFSET;
|
|
281 my $chr_heigth=$heigth-$YOFFSET-$tail_margin;
|
|
282 print "chr number:$chr_number\n";
|
|
283 my $one_chr_heigth=$chr_heigth/$chr_number;
|
|
284 my $Yscale=($one_chr_heigth-15)/$max_all_density;
|
|
285 #my $chr_width=$YOFFSET;
|
|
286 #my $chr_start_y;
|
|
287 #my $chr_end_y;
|
|
288 #my $Yscale=0.01;
|
|
289 #=======================================title of the graph===============================
|
|
290 #my $span_k=$span/1000;
|
|
291 #$svg->text('x',$width/2,'y',$YOFFSET-20,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Clusters rpkm/"."$span_k"."kb Distribution");
|
|
292 #=======================================the top max chr line=============================
|
|
293 $svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
|
|
294 $long_scale=int ($max_length/10);#Ê®µÈ·Ö ´ó¿Ì¶È
|
|
295 #´ó×ø±ê¿Ì¶È
|
|
296 for ($i=0;$i<=10;$i++) {
|
|
297 my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale;
|
|
298 my $long_x_end=$long_x_start;
|
|
299 my $long_y_start=$YOFFSET;
|
|
300 my $long_y_end=$YOFFSET-5;
|
|
301 $svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
|
|
302 my $Bscale=$long_scale*$i;
|
|
303 my $cdata=int ($Bscale/1000000);
|
|
304 $svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M");
|
|
305 }
|
|
306 #=========================================================================================
|
|
307 my $cc=1;
|
|
308 foreach my $chr (sort keys %length) {
|
|
309 my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale;
|
|
310 my $chr_start_x=$XOFFSET;
|
|
311 my $chr_start_y=$YOFFSET+$cc*$one_chr_heigth;
|
|
312 my $chr_end_y=$chr_start_y;
|
|
313 #$chr_start_y+=$chr_width;
|
|
314 #$chr_end_y+=$chr_width;
|
|
315 # for (my $i=0;$i<@{$gene_desity{$chr}};$i++) {
|
|
316 # print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
|
|
317 # my $red=$gene_desity{$chr}[$i]/$max_gene_number*255;
|
|
318 # my $green=$gene_desity{$chr}[$i]/$max_gene_number*255;
|
|
319 # print "$red\t$green\t0\n";
|
|
320 # $svg->rect('x',$chr_start_x+$i*$span*$Xscale,'y',$chr_start_y,'width',$span*$Xscale,'height',8,'stroke',"rgb($red,$green,0)",'stroke-width',0.1,'fill',"rgb($red,$green,0)");
|
|
321 # }
|
|
322
|
|
323 $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
|
|
324 $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr);
|
|
325 my $m_length=$length{$chr}%1000000;
|
|
326 $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$m_length."M");
|
|
327
|
|
328
|
|
329 if (defined($centromere{$chr}[0])) {
|
|
330 $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y-2,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
|
|
331 }
|
|
332 for (my $s=0;$s<@mark ;$s++) {
|
|
333 for (my $i=0;$i<$#{$cluster_density{$chr}}-1 ;$i++) {
|
|
334 #if ($cluster_density{$chr}[$i]*$Yscale>40) {
|
|
335 #$cluster_density{$chr}[$i]=40/$Yscale;
|
|
336 #$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green");
|
|
337 #}
|
|
338 #print "$i\t$cluster_density{$chr}[$i][$s]\t$cluster_density{$chr}[$i+1][$s]\n";
|
|
339 my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale;
|
|
340 my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale;
|
|
341 my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i][$s]*$Yscale;
|
|
342 my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][$s]*$Yscale;
|
|
343 my $c_red=($s+1)/@mark*255;
|
|
344 $svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"rgb($c_red,125,0)",'stroke-width',0.3);
|
|
345 }
|
|
346
|
|
347 }
|
|
348 #=======Y axis
|
|
349 $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_start_x,y2=>$chr_start_y-$one_chr_heigth+15,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
|
|
350 #=======Y axis ===>3 xiaoge
|
|
351 my $s10=1;
|
|
352 my $e10=0;
|
|
353 my $chr_max=$max_all_density;
|
|
354 while ($chr_max>10) {
|
|
355 $chr_max=int($chr_max/10);
|
|
356 $s10=$s10*10;
|
|
357 $e10++;
|
|
358 }
|
|
359 $chr_max=$chr_max/2;
|
|
360 #print "*****$max_all_density\t$chr_max\t$s10\n";
|
|
361 for (my $i=1;$i<3 ;$i++) {
|
|
362 my $y1=$chr_start_y-$chr_max*$s10*$Yscale*$i;
|
|
363 my $xiaoge_Y=$chr_max*$i;
|
|
364 $svg->line('x1',$chr_start_x,'y1',$y1,'x2',$chr_start_x+3,'y2',$y1,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
|
|
365 $svg->text('x',$chr_start_x-26,'y',$y1+4,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',8,'font-family',$attribute{font}{'font-family'},'-cdata',$xiaoge_Y."e".$e10);
|
|
366 }
|
|
367 $cc++;
|
|
368 }
|
|
369
|
|
370 for (my $s=0;$s<@mark ;$s++) {
|
|
371 my $c_red=($s+1)/@mark*255;
|
|
372 print "**$c_red\n";
|
|
373 $svg->line('x1',$canvas_end_x+100,'y1',$YOFFSET+$s*20+30,'x2',$canvas_end_x+130,'y2',$YOFFSET+$s*20+30,'stroke',"rgb($c_red,125,0)",'stroke-width',1);
|
|
374 $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+$s*20+5+30,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$s]);
|
|
375 }
|
|
376 #
|
|
377 #
|
|
378 if (defined($opt{cen})) {
|
|
379 $svg->rect('x',$canvas_end_x+100,'y',$YOFFSET+@mark*20+30,'width',30,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
|
|
380 $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+@mark*20+30+5,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere");
|
|
381 }
|
|
382
|
|
383 close TXT;
|
|
384
|
|
385 open (OUT,">$opt{o}");
|
|
386 print OUT $svg->xmlify();
|
|
387
|
|
388 sub log2 {
|
|
389 my $n = shift;
|
|
390 return log($n)/log(2);
|
|
391 }
|
|
392
|
|
393 sub usage{
|
|
394 print <<"USAGE";
|
|
395 Version $version
|
|
396 Usage:
|
|
397 $0
|
|
398 options:
|
|
399 -g genelist
|
|
400 -span
|
|
401 -n sample cloumn
|
|
402 -mark sample name
|
|
403 -o output graph file name with html or svg extension
|
|
404 -c cluster file input
|
|
405 -out txt output
|
|
406 -l length of chr
|
|
407 -cen centromere
|
|
408 -h help
|
|
409 USAGE
|
|
410 exit(1);
|
|
411 } |