Mercurial > repos > cpt > cpt_psm_plotter
comparison cpt_dnaplotter.pl @ 1:8691c1c61a8e draft default tip
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:48:47 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:54c7a3ea81e2 | 1:8691c1c61a8e |
---|---|
1 #!/usr/bin/perl | |
2 # | |
3 # Code written by Eric Rasche | |
4 # mailto:rasche.eric@yandex.ru | |
5 # tel: 404.692.2048 | |
6 # http://eric.rasche.co.uk | |
7 # for | |
8 # Center for Phage Technology | |
9 # | |
10 | |
11 use strict; | |
12 use warnings; | |
13 | |
14 use CPT::GalaxyGetOpt; | |
15 use Data::Dumper; | |
16 use CPT::Bio; | |
17 use CPT::Circos::Conf; | |
18 my $bio = CPT::Bio->new(); | |
19 | |
20 my @argv_copy; | |
21 foreach(@ARGV){push(@argv_copy, "$_");} | |
22 | |
23 my $ggo = CPT::GalaxyGetOpt->new(); | |
24 my $options = $ggo->getOptions( | |
25 'options' => [ | |
26 [ 'file|f', 'Input file', { validate => 'File/Input', | |
27 file_format => ['genbank', 'embl', 'txt'], | |
28 } ], | |
29 [], | |
30 ['Track Configuration'], | |
31 ['track_key', 'Key to select from genbank data', { validate => 'Genomic/Tag', required => 1, multiple => 1 } ], | |
32 ['track_feature_filter_invert', 'Should the qualifier search be inverted?', { validate => 'Option', options => { 'yes', 'Yes', 'no', 'No' } , multiple => 1 } ], | |
33 ['track_feature_filter_hastag', 'Select a tag which should be present in that qualifier (e.g., signal/tmhelix/pseudo)', { validate => 'String' , multiple => 1 } ], | |
34 ['track_feature_filter_textquery', 'Specify text which MUST be in that tag', { validate => 'String' , multiple => 1 } ], | |
35 ['track_feature_filter_strand', 'Which strand should the feature appear on?', { validate => 'Option', options => { 'f', 'Forward', 'r', 'Reverse', 'a', 'Any' } , multiple => 1 } ], | |
36 [], | |
37 ['enable_gc_skew_plot', 'Enable/Disable calculation of GC Skew Plot', { validate => 'Flag' } ], | |
38 ['gc_skew_plot_window_size', 'Window size for calculation of GC Skew', { validate => 'Int', min => 1000, default => 10000} ], | |
39 ['gc_skew_plot_step_size', 'Step size for calculation of GC Skew', { validate => 'Int', min => 200, default => 200 } ], | |
40 ], | |
41 'outputs' => [ | |
42 [ | |
43 'output_circos_confs', | |
44 'Circos Configuration Files', | |
45 { | |
46 validate => 'File/Output', | |
47 required => 1, | |
48 default => 'out', | |
49 data_format => 'archive', | |
50 default_format => 'zip', | |
51 } | |
52 ], | |
53 ], | |
54 'defaults' => [ | |
55 'appid' => 'CircularDNAPlotter', | |
56 'appname' => 'Circos based DNAPlotter', | |
57 'appdesc' => 'plots genomes similar to Artemis\'s DNAPlotter', | |
58 'appvers' => '1.94.1', | |
59 ], | |
60 ); | |
61 | |
62 #perl cpt_dnaplotter.pl \ | |
63 #-f ../t/test-files/moon.gbk \ | |
64 #--track_key CDS --track_feature_filter_invert yes --track_feature_filter_hastag pseudo --track_feature_filter_strand f \ | |
65 #--track_key CDS --track_feature_filter_invert yes --track_feature_filter_hastag pseudo --track_feature_filter_strand r \ | |
66 #--track_key CDS --track_feature_filter_hastag pseudo --track_feature_filter_strand a \ | |
67 #--track_key tRNA --track_feature_filter_strand a \ | |
68 #--track_key CDS --track_feature_filter_hastag signal --track_feature_filter_strand a \ | |
69 #--track_key CDS --track_feature_filter_hastag tmhelix --track_feature_filter_strand a | |
70 | |
71 | |
72 my @reorg_args = (); | |
73 | |
74 my $cum_gc_ske_mean = 0; | |
75 | |
76 | |
77 | |
78 my %latest = (); | |
79 for(my $i = 0; $i < scalar(@argv_copy); $i++){ | |
80 my $c = $argv_copy[$i]; | |
81 # We have entered a new one block | |
82 if($c eq '--track_key'){ | |
83 # If we have loaded data | |
84 if(scalar(keys(%latest)) > 0){ | |
85 my %copy; | |
86 foreach(keys(%latest)){ | |
87 $copy{$_} = "" . $latest{$_}; | |
88 } | |
89 push(@reorg_args, \%copy); | |
90 } | |
91 | |
92 # Clean out latest to prep for new data | |
93 foreach(keys(%latest)){ | |
94 delete $latest{$_}; | |
95 } | |
96 } | |
97 | |
98 if($c =~ /^--track_(.*)/){ | |
99 $latest{$1} = $argv_copy[$i+1]; | |
100 # Artificially bump so we don't bother looking at the answer to | |
101 # this question. We can "get away" with this because none of | |
102 # the options are flags. However, I've disabled it in the event | |
103 # that flags ARE introduced | |
104 #$i++; | |
105 } | |
106 } | |
107 push(@reorg_args, \%latest); | |
108 #$VAR1 = [ | |
109 #{ | |
110 #'feature_filter_invert' => 'yes', | |
111 #'feature_plot_color' => '005500', | |
112 #'feature_filter_strand' => 'f', | |
113 #'feature_filter_hastag' => 'pseudo', | |
114 #'key' => 'CDS' | |
115 #}, | |
116 #{ | |
117 #'feature_filter_strand' => 'a', | |
118 #'key' => 'RBS' | |
119 #} | |
120 #]; | |
121 my @files = (); | |
122 | |
123 my $number_of_tracks = 0; | |
124 sub register_track { | |
125 #my $r0 = ( 90 - (10 * $number_of_tracks - 1)/2) / 100; | |
126 #my $r1 = ( 90 - (10 * $number_of_tracks - 9)/2) / 100; | |
127 my $r0 = ( 90 - (10 * $number_of_tracks - 1)/1) / 100; | |
128 my $r1 = ( 90 - (10 * $number_of_tracks - 9)/1) / 100; | |
129 $number_of_tracks++; | |
130 return ($r0, $r1); | |
131 } | |
132 | |
133 sub circosconf { | |
134 my $cc = CPT::Circos::Conf->new(); | |
135 $cc->include('etc/colors_fonts_patterns.conf'); | |
136 # Features to plot along the genome | |
137 $cc->include('ideogram.conf'); | |
138 # markings indicating position along genome | |
139 $cc->include('ticks.conf'); | |
140 # Genome data | |
141 $cc->set('karyotype', 'karyotype.conf'); | |
142 # Default image params are fine | |
143 $cc->start_block('image'); | |
144 $cc->include('etc/image.conf'); | |
145 $cc->end_block(); | |
146 # ??? | |
147 $cc->set('chromosome_units', '1000'); | |
148 $cc->set('chromosome_display_default', 'yes'); | |
149 #$cc->include('highlights.conf'); | |
150 $cc->include('plots.conf'); | |
151 | |
152 $cc->include('etc/housekeeping.conf'); | |
153 my $result = $cc->finalize(); | |
154 $cc = CPT::Circos::Conf->new(); | |
155 return $result; | |
156 } | |
157 sub ideogramconf{ | |
158 my $cc = CPT::Circos::Conf->new(); | |
159 $cc->start_block('ideogram'); | |
160 $cc->start_block('spacing'); | |
161 $cc->set('default','0u'); | |
162 $cc->set('break','0u'); | |
163 $cc->end_block(); | |
164 | |
165 $cc->set('thickness', '20p'); | |
166 $cc->set('stroke_thickness', '2'); | |
167 $cc->set('stroke_color', 'black'); | |
168 $cc->set('fill','no'); | |
169 $cc->set('fill_color','black'); | |
170 $cc->set('radius','0.85r'); | |
171 $cc->set('show_label','yes'); | |
172 $cc->set('label_font','default'); | |
173 $cc->set('label_radius','dims(ideogram,radius) + 0.05'); | |
174 $cc->set('label_size','36'); | |
175 $cc->set('label_parallel','yes'); | |
176 $cc->set('label_case','upper'); | |
177 | |
178 $cc->set('band_stroke_thickness','2'); | |
179 $cc->set('show_bands','yes'); | |
180 $cc->set('fill_bands','yes'); | |
181 $cc->end_block(); | |
182 | |
183 return $cc->finalize(); | |
184 } | |
185 sub generate_feature_table { | |
186 my ($filename, %filter) = @_; | |
187 print "Filtering on features\n"; | |
188 print Dumper \%filter; | |
189 my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); | |
190 # Only handing first sequence. | |
191 my $seq_object = $seqio_object->next_seq; | |
192 my $parent = $seq_object->display_id(); | |
193 # Feature data | |
194 my @features; | |
195 foreach my $feat($seq_object->get_SeqFeatures()){ | |
196 if($feat->primary_tag() eq $filter{key}){ | |
197 # If they've said "hastag" AND we do indeed have that tag AND we haven't inverted this filter. | |
198 if( | |
199 ($filter{feature_filter_hastag} && $feat->has_tag($filter{feature_filter_hastag}) && !$filter{feature_filter_invert}) | |
200 || | |
201 ($filter{feature_filter_hastag} && $filter{feature_filter_invert} && !$feat->has_tag($filter{feature_filter_hastag})) | |
202 || | |
203 (! $filter{feature_filter_hastag}) | |
204 ){ | |
205 if( | |
206 !$filter{feature_filter_strand} | |
207 || | |
208 ($feat->strand() == 1 && ( $filter{feature_filter_strand} eq 'f' || $filter{feature_filter_strand} eq 'a' )) | |
209 || | |
210 ($feat->strand() == -1 && ( $filter{feature_filter_strand} eq 'r' || $filter{feature_filter_strand} eq 'a' )) | |
211 || | |
212 ($feat->strand() == 0 && ( $filter{feature_filter_strand} eq 'a' )) | |
213 ){ | |
214 push(@features, join(' ', $parent, $feat->start, $feat->end)); | |
215 } | |
216 } | |
217 } | |
218 } | |
219 print "Found " . scalar @features . " features \n"; | |
220 push(@files, [ 'data/' . $filename, join("\n", @features) ] ); | |
221 } | |
222 sub plotsconf{ | |
223 my $cc = CPT::Circos::Conf->new(); | |
224 | |
225 $cc->start_block('plots'); | |
226 | |
227 | |
228 my $idx = 0; | |
229 foreach(@reorg_args){ | |
230 my %filter = %{$_}; | |
231 #{ | |
232 #'feature_filter_invert' => 'yes', | |
233 #'feature_plot_color' => '005500', | |
234 #'feature_filter_strand' => 'f', | |
235 #'feature_filter_hastag' => 'pseudo', | |
236 #'key' => 'CDS' | |
237 #}, | |
238 $idx++; | |
239 my $filename = sprintf('org.features.%s.txt', $idx); | |
240 generate_feature_table($filename, %filter); | |
241 | |
242 my ($r0,$r1) = register_track(); | |
243 $cc->start_block('plot'); | |
244 $cc->set('type','tile'); | |
245 $cc->set('file',$filename); | |
246 $cc->set('orientation', 'center'); | |
247 $cc->set('thickness', '30'); | |
248 $cc->set('r1', $r1 . 'r');# '0.78r'); | |
249 $cc->set('r0', $r0 . 'r');# '0.72r'); | |
250 $cc->set('layers','3'); | |
251 $cc->set('layers_overflow','collapse'); | |
252 $cc->set('color','paired-6-qual-' . $idx); | |
253 $cc->end_block(); | |
254 } | |
255 | |
256 if($options->{enable_gc_skew_plot}){ | |
257 my ($r0,$r1) = register_track(); | |
258 $cc->start_block('plot'); | |
259 $cc->set('type','histogram'); | |
260 $cc->set('file','gc.txt'); | |
261 $cc->set('r1',$r1 . 'r'); | |
262 $cc->set('r0',$r0 . 'r'); | |
263 $cc->set('fill_color','purple'); | |
264 $cc->set('orientation','in'); | |
265 $cc->start_block('rules'); | |
266 $cc->start_block('rule'); | |
267 $cc->set('condition','var(value) < 0'); | |
268 $cc->set('fill_color', 'green'); | |
269 $cc->end_block(); | |
270 $cc->end_block(); | |
271 $cc->end_block(); | |
272 } | |
273 | |
274 #$cc->start_block('plot'); | |
275 #$cc->set('type','histogram'); | |
276 #$cc->set('file','gc_cumulative.txt'); | |
277 #$cc->set('r1','0.6r'); | |
278 #$cc->set('r0','0.55r'); | |
279 #$cc->set('fill_color','purple'); | |
280 #$cc->set('orientation','out'); | |
281 #$cc->start_block('rules'); | |
282 #$cc->start_block('rule'); | |
283 #$cc->set('condition','var(value) < ' . $cum_gc_ske_mean); | |
284 #$cc->set('fill_color', 'green'); | |
285 #$cc->end_block(); | |
286 #$cc->end_block(); | |
287 #$cc->end_block(); | |
288 | |
289 $cc->end_block(); | |
290 return $cc->finalize(); | |
291 } | |
292 sub ticksconf{ | |
293 my $cc = CPT::Circos::Conf->new(); | |
294 | |
295 $cc->set('show_ticks','yes'); | |
296 $cc->set('show_tick_labels','yes'); | |
297 $cc->start_block('ticks'); | |
298 $cc->set('radius','1r'); | |
299 $cc->set('color','black'); | |
300 $cc->set('thickness','2p'); | |
301 $cc->set('multiplier','1e-3'); | |
302 $cc->set('format','%d'); | |
303 | |
304 $cc->start_block('tick'); | |
305 $cc->set('spacing','1000u'); | |
306 $cc->set('size','10p'); | |
307 $cc->end_block(); | |
308 | |
309 $cc->start_block('tick'); | |
310 $cc->set('spacing','10000u'); | |
311 $cc->set('size','15p'); | |
312 $cc->set('show_label','yes'); | |
313 $cc->set('label_size','20p'); | |
314 $cc->set('label_offset','10p'); | |
315 $cc->set('format','%d'); | |
316 $cc->end_block(); | |
317 $cc->end_block(); | |
318 return $cc->finalize(); | |
319 } | |
320 sub karyotype { | |
321 my @karyotype_data = (); | |
322 my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); | |
323 # Only handing first sequence. | |
324 my $seq_object = $seqio_object->next_seq; | |
325 # Main 'chromosome' data | |
326 push(@karyotype_data, join(' ', 'chr', '-',$seq_object->display_id(),$seq_object->display_id(),0, $seq_object->length(),'black')); | |
327 | |
328 return join("\n", @karyotype_data); | |
329 } | |
330 | |
331 sub gcgraph_cumulative { | |
332 my @gcdata = (); | |
333 my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); | |
334 # Only handing first sequence. | |
335 my $seq_object = $seqio_object->next_seq; | |
336 | |
337 my $parent = $seq_object->display_id(); | |
338 | |
339 my $seq = $seq_object->seq(); | |
340 my $sep = int($options->{gc_skew_plot_window_size}/2); | |
341 my $stepsep = int($options->{gc_skew_plot_step_size}/2); | |
342 my $cumulative_gc_skew = 0; | |
343 my @cumgc_vals; | |
344 | |
345 my $count = 0; | |
346 foreach(my $i = $sep; $i < $seq_object->length() - $sep - $options->{gc_skew_plot_step_size}; $i += $options->{gc_skew_plot_step_size}){ | |
347 $count++; | |
348 $cumulative_gc_skew += _calculate_gc_skew_for_seq(substr($seq,$i-$sep,$options->{gc_skew_plot_window_size})), | |
349 push(@cumgc_vals, $cumulative_gc_skew); | |
350 push(@gcdata, join(" ", | |
351 $parent, | |
352 $i - $stepsep, | |
353 $i + $stepsep, | |
354 $cumulative_gc_skew | |
355 )); | |
356 } | |
357 | |
358 my $sum = 0; | |
359 foreach(@cumgc_vals){$sum += $_;} | |
360 $cum_gc_ske_mean = $sum / $count; | |
361 | |
362 return join("\n", @gcdata); | |
363 # Main 'chromosome' data | |
364 } | |
365 sub gcgraph { | |
366 my @gcdata = (); | |
367 my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); | |
368 # Only handing first sequence. | |
369 my $seq_object = $seqio_object->next_seq; | |
370 | |
371 my $parent = $seq_object->display_id(); | |
372 | |
373 my $seq = $seq_object->seq(); | |
374 my $sep = int($options->{gc_skew_plot_window_size}/2); | |
375 my $stepsep = int($options->{gc_skew_plot_step_size}/2); | |
376 foreach(my $i = $sep; $i < $seq_object->length() - $sep - $options->{gc_skew_plot_step_size}; $i += $options->{gc_skew_plot_step_size}){ | |
377 push(@gcdata, join(" ", | |
378 $parent, | |
379 $i - $stepsep, | |
380 $i + $stepsep, | |
381 _calculate_gc_skew_for_seq(substr($seq,$i-$sep,$options->{gc_skew_plot_window_size})), | |
382 )); | |
383 } | |
384 return join("\n", @gcdata); | |
385 # Main 'chromosome' data | |
386 } | |
387 sub _calculate_gc_skew_for_seq { | |
388 my ($seq) = @_; | |
389 $seq = uc($seq); | |
390 my %counts; | |
391 foreach(split //,$seq){ | |
392 $counts{$_}++; | |
393 } | |
394 if($counts{G} + $counts{C} > 0){ | |
395 return ($counts{G} - $counts{C}) / ($counts{G} + $counts{C}); | |
396 } | |
397 return 0; | |
398 } | |
399 | |
400 push(@files, [ 'etc/karyotype.conf', karyotype() ]); | |
401 push(@files, [ 'etc/circos.conf', circosconf() ]); | |
402 push(@files, [ 'etc/ideogram.conf', ideogramconf() ]); | |
403 if($options->{enable_gc_skew_plot}){ | |
404 push(@files, [ 'data/gc.txt', gcgraph() ]); | |
405 push(@files, [ 'data/gc_cumulative.txt', gcgraph_cumulative() ]); | |
406 } | |
407 | |
408 push(@files, [ 'etc/plots.conf', plotsconf() ]); | |
409 push(@files, [ 'etc/ticks.conf', ticksconf() ]); | |
410 | |
411 | |
412 use Archive::Any::Create; | |
413 my $archive = Archive::Any::Create->new(); | |
414 $archive->container('conf'); | |
415 foreach(@files){ | |
416 my ($location, $content) = @{$_}; | |
417 $archive->add_file($location, $content); | |
418 } | |
419 | |
420 use CPT::OutputFiles; | |
421 my $crr_output = CPT::OutputFiles->new( | |
422 name => 'output_circos_confs', | |
423 GGO => $ggo, | |
424 ); | |
425 $crr_output->CRR(data => $archive); | |
426 | |
427 =head1 NAME | |
428 | |
429 DNAPlotter | |
430 | |
431 =head1 DESCRIPTION | |
432 | |
433 Much like artemis's DNAPlotter, this tool plots genomes in a ciruclar fashion, and can plot gc-deviation tracks as well. The options are somewhat reduced compared to artemis, so if you need something that isn't available in this version please file a bug report. | |
434 | |
435 =head1 USE | |
436 | |
437 Each track has several parameters: | |
438 | |
439 =over 4 | |
440 | |
441 =item track_key | |
442 | |
443 This selects a set of features from a genbank file, e.g., CDSs or tRNAs | |
444 | |
445 =item track_feature_filter_invert | |
446 | |
447 This parameter will invert (negate) the results of whatever query parameters you specify after it. | |
448 | |
449 =item track_feature_filter_hastag | |
450 | |
451 Require that a feature has a specific tag.... | |
452 | |
453 =item track_feature_filter_textquery | |
454 | |
455 ...with this specific text in it | |
456 | |
457 =item track_feature_filter_strand | |
458 | |
459 Which strand should the feature be on (not inverted) | |
460 | |
461 =back | |
462 | |
463 Additionally, users are able to enable/disable GC skew plots. it's suggested that these are generally left alone, as they can quickly increase runtime. | |
464 | |
465 =cut |