Mercurial > repos > dereeper > roary_plots
comparison Roary/contrib/roary2svg/roary2svg.pl @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c47a5f61bc9f |
---|---|
1 #!/usr/bin/env perl | |
2 # From Torsten Seemann commit f46312e9df539c56b058f0ef25479d7297ceec89 | |
3 # https://raw.githubusercontent.com/tseemann/nullarbor/master/bin/roary2svg.pl | |
4 use warnings; | |
5 use strict; | |
6 use Data::Dumper; | |
7 use List::Util qw(min max sum); | |
8 use List::MoreUtils qw(uniq all any); | |
9 use Text::CSV; | |
10 use SVG; | |
11 | |
12 use constant FONT_ASPECT => 0.8; | |
13 | |
14 my(@Options, $verbose, $taxacol, $width, $height, $acconly, | |
15 $consensus, $border, $colour, $sepcolour); | |
16 setOptions(); | |
17 | |
18 # read gene_presence_absence.csv from stdin | |
19 # "Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","SRR2352235","SRR2352236","SRR2352237","SRR2352238","SRR2352239","SRR2352240","SRR2352241","SRR2352242","SRR2352243","SRR2352244","SRR2352245","SRR2352246","SRR2352247","SRR2352248","SRR2352249","SRR2352250","SRR2352251","SRR2352252" | |
20 my $csv = Text::CSV->new() or die $!; | |
21 my $count=0; | |
22 my @matrix; | |
23 my @id; | |
24 my $N; | |
25 my $C=0; | |
26 my @tally; # genes per taxon | |
27 my @is_core; # boolean for this cluster being core | |
28 | |
29 while (my $row = $csv->getline(\*ARGV) ) { | |
30 if ($count == 0) { | |
31 @id = splice @$row, $taxacol; | |
32 $N = scalar(@id); | |
33 print STDERR "Found $N taxa: @id\n"; | |
34 } | |
35 else { | |
36 my @present = map { $row->[$taxacol+$_] ? 1 : 0 } (0 .. $N-1); | |
37 my $num_present = sum(@present); | |
38 $is_core[$count] = ($num_present == $N); | |
39 next if $acconly and $is_core[$count]; | |
40 # next if $panonly and all { $_==1 } @present; | |
41 push @{ $matrix[$_] }, $present[$_] for (0 .. $N-1); | |
42 $tally[$_] += $present[$_] for (0 .. $N-1); | |
43 $C++; | |
44 } | |
45 $count++; | |
46 } | |
47 print STDERR "Found $C clusters.\n"; | |
48 | |
49 my $real_height = $height*($N+1); | |
50 my $svg = SVG->new(width=>$width, height=>$real_height); | |
51 my $dy = $height; | |
52 my $fontsize = 0.75 * $dy; | |
53 | |
54 my $lchars = max( map { length($_) } @id ); | |
55 my $llen = $fontsize * (1 + $lchars) * FONT_ASPECT; | |
56 | |
57 my $rchars = max( map { length("$_") } @tally); | |
58 my $rlen = $fontsize * (1 + $rchars) * FONT_ASPECT; | |
59 | |
60 my $width2 = $width - $llen - $rlen; | |
61 my $dx = $width2 / $C; | |
62 my $font_style = { 'font-family'=>'sans-serif', 'fill'=>'black', 'font-size'=>$fontsize }; | |
63 | |
64 print STDERR "Box = $dx x $dy px\n"; | |
65 print STDERR "Left label = $lchars chr x $fontsize px\n"; | |
66 print STDERR "Right label = $rchars chr x $fontsize px\n"; | |
67 | |
68 for my $j (0 .. $N-1) { | |
69 for my $i (0 .. $C-1) { | |
70 # print STDERR "$j $i $matrix[$j][$i]\n"; | |
71 if ($matrix[$j][$i]) { | |
72 # box for each present gene | |
73 $svg->rectangle( | |
74 'x' => $llen+$i*$dx, 'y' => $j*$dy, 'width' => $dx, 'height' => $dy-1, | |
75 'style' => { fill=>$colour, opacity=>($is_core[$i] ? 1 : 0.75) }, | |
76 ); | |
77 } | |
78 } | |
79 # taxon label for each row | |
80 $svg->text( x=>$fontsize, y=>($j+0.75)*$dy, -cdata=>$id[$j], style=>$font_style ); | |
81 # number of genes for each row | |
82 $svg->text( x=>$llen+$width2+$fontsize, y=>($j+0.75)*$dy, -cdata=>$tally[$j], style=>$font_style ); | |
83 # separator line | |
84 my $ypos = ($j+1)*$dy; | |
85 $svg->line( x1=>0, y1=>$ypos, x2=>$width, y2=>$ypos, style=>{stroke=>$sepcolour}); | |
86 } | |
87 | |
88 # bottom label | |
89 my $bottom_text = "$N taxa, $C clusters"; | |
90 $bottom_text .= $acconly ? " (accessory only)" : " (core + accessory)"; | |
91 $svg->text( x=>$llen, y=>($N+0.75)*$dy, -cdata=>$bottom_text, style=>$font_style ); | |
92 | |
93 # border | |
94 if ($border) { | |
95 $svg->rectangle( | |
96 'x' => 0, 'y' => 0, 'width' => $width, 'height' => $real_height, | |
97 'style' => { stroke=>$sepcolour, fill=>'none' }, | |
98 ); | |
99 } | |
100 | |
101 print STDERR "Writing SVG file\n"; | |
102 print STDOUT $svg->xmlify; | |
103 | |
104 print STDERR "Done.\n"; | |
105 | |
106 #---------------------------------------------------------------------- | |
107 # Option setting routines | |
108 | |
109 sub setOptions { | |
110 use Getopt::Long; | |
111 | |
112 @Options = ( | |
113 {OPT=>"help", VAR=>\&usage, DESC=>"This help"}, | |
114 {OPT=>"verbose!", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose output"}, | |
115 {OPT=>"width=i", VAR=>\$width, DEFAULT=>1024, DESC=>"Canvas width"}, | |
116 {OPT=>"height=i", VAR=>\$height, DEFAULT=>20, DESC=>"Row height"}, | |
117 {OPT=>"taxacolumn=i", VAR=>\$taxacol, DEFAULT=>14, DESC=>"Column in gpa.csv where taxa begin"}, | |
118 {OPT=>"colour=s", VAR=>\$colour, DEFAULT=>'DimGray', DESC=>"Colour of core cells"}, | |
119 {OPT=>"sepcolour=s", VAR=>\$sepcolour, DEFAULT=>'LightGray', DESC=>"Colour of horizontal separators/borders"}, | |
120 {OPT=>"acconly!", VAR=>\$acconly, DEFAULT=>0, DESC=>"Only draw accessory (non-core) genes"}, | |
121 # {OPT=>"consensus!", VAR=>\$consensus, DEFAULT=>0, DESC=>"Add consensus row"}, | |
122 {OPT=>"border!", VAR=>\$border, DEFAULT=>0, DESC=>"Add outline border"}, | |
123 ); | |
124 | |
125 (!@ARGV) && (usage()); | |
126 | |
127 &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage(); | |
128 | |
129 # Now setup default values. | |
130 foreach (@Options) { | |
131 if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) { | |
132 ${$_->{VAR}} = $_->{DEFAULT}; | |
133 } | |
134 } | |
135 } | |
136 | |
137 sub usage { | |
138 print "Usage: $0 [options] gene_presence_absence.csv > pan_genome.svg\n"; | |
139 foreach (@Options) { | |
140 printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC}, | |
141 defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : ""; | |
142 } | |
143 exit(1); | |
144 } | |
145 | |
146 #---------------------------------------------------------------------- | |
147 |