comparison bismark_pretty_report/bismark2report @ 7:fcadce4d9a06 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
author bgruening
date Sat, 06 May 2017 13:18:09 -0400
parents
children
comparison
equal deleted inserted replaced
6:0f8646f22b8d 7:fcadce4d9a06
1 #!/usr/bin/env perl
2 use warnings;
3 use strict;
4 use Getopt::Long;
5 use FindBin qw($Bin);
6 use lib "$Bin/../lib";
7
8 ## This program is Copyright (C) 2010-16, Felix Krueger (felix.krueger@babraham.ac.uk)
9
10 ## This program is free software: you can redistribute it and/or modify
11 ## it under the terms of the GNU General Public License as published by
12 ## the Free Software Foundation, either version 3 of the License, or
13 ## (at your option) any later version.
14
15 ## This program is distributed in the hope that it will be useful,
16 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ## GNU General Public License for more details.
19
20 ## You should have received a copy of the GNU General Public License
21 ## along with this program. If not, see <http://www.gnu.org/licenses/>.
22
23 my $bismark2report_version = 'v0.16.3';
24 my (@alignment_reports,@dedup_reports,@splitting_reports,@mbias_reports,@nuc_reports);
25
26 my ($output_dir,$verbose,$manual_output_file) = process_commandline();
27
28 # print join (",",@alignment_reports)."\n";
29 # print join (",",@dedup_reports)."\n";
30 # print join (",",@splitting_reports)."\n";
31 # print join (",",@mbias_reports)."\n";
32 # print join (",",@nuc_reports)."\n";
33
34 while (@alignment_reports){
35
36 my $alignment_report = shift @alignment_reports;
37 my $dedup_report = shift @dedup_reports;
38 my $splitting_report = shift @splitting_reports;
39 my $mbias_report = shift @mbias_reports;
40 my $nuc_report = shift @nuc_reports;
41
42 ### HTML OUTPUT FILE
43 my $report_output = $alignment_report;
44 $report_output =~ s/^.*\///; # deleting optional path information
45 $report_output =~ s/\.txt$//;
46 $report_output =~ s/$/.html/;
47
48 # if -o output_file was specified we are going to use that name preferentially. This may only happen if there is a single report in the folder, or if a single report has been specified manually
49 if ($manual_output_file){
50 warn "A specific output filename was specified: $manual_output_file. Using that one instead of deriving the filename\n"; sleep(1);
51 $report_output = $manual_output_file;
52 }
53
54 $report_output = $output_dir.$report_output;
55 warn "\nWriting Bismark HTML report to >> $report_output <<\n\n";
56
57 my $doc = read_report_template(); #reading and storing the entire report template
58
59 # BISMARK ALIGNMENT REPORT (mandatory)
60 warn "="x110,"\n";
61 warn "Using the following alignment report:\t\t> $alignment_report <\n";
62 # DEDUPLICATION REPORT (optional)
63 if ($dedup_report){
64 warn "Using the following deduplication report:\t> $dedup_report <\n";
65 }
66 else{
67 warn "No deduplication report file specified, skipping this step\n";
68 }
69
70 # SPLITTING REPORT (optional)
71 if ($splitting_report){
72 warn "Using the following splitting report:\t\t> $splitting_report <\n";
73 }
74 else{
75 warn "No splitting report file specified, skipping this step\n";
76 }
77
78 # M-BIAS REPORT (optional)
79 if ($mbias_report){
80 warn "Using the following M-bias report:\t\t> $mbias_report <\n";
81 }
82 else{
83 warn "No M-bias report file specified, skipping this step\n";
84 }
85
86 # NUCLEOTIDE COVERAGE REPORT (optional)
87 if ($nuc_report){
88 warn "Using the following nucleotide coverage report:\t> $nuc_report <\n";
89 }
90 else{
91 warn "No nucleotide coverage report file specified, skipping this step\n";
92 }
93 warn "="x110,"\n\n\n";
94 $verbose and sleep(3);
95
96 # creating timestamp
97 $doc = getLoggingTime($doc);
98
99 $doc = read_alignment_report($alignment_report,$doc); # mandatory
100
101 if ($dedup_report){ # optional
102 $doc = read_deduplication_report($dedup_report,$doc);
103
104 # removing the delete tags in the html template
105 $doc =~ s/\{\{start_deletion_duplication\}\}//g;
106 $doc =~ s/\{\{end_deletion_duplication\}\}//g;
107 }
108 else{
109 # removing the entire graph and table section for the deduplication part
110 warn "Removing the duplication section from the HTML report\n" if ($verbose);
111 $doc =~ s/(\{\{start_deletion_duplication.*?end_deletion_duplication\}\})//gs; # s includes newline chars; non-greedy
112 warn "Deleting the following content:\n$1\n\n" if ($verbose);
113 sleep(3) if ($verbose);
114 }
115
116 if ($nuc_report){ # optional
117 $doc = read_nucleotide_coverage_report($nuc_report,$doc);
118
119 # removing the delete tags in the html template
120 $doc =~ s/\{\{start_deletion_nucleotide_coverage\}\}//g;
121 $doc =~ s/\{\{end_deletion_nucleotide_coverage\}\}//g;
122 }
123 else{
124 # removing the entire graph and table section for the nucleotide coverage part
125 warn "Removing the nucleotide coverage section from the HTML report\n" if ($verbose);
126 $doc =~ s/(\{\{start_deletion_nucleotide_coverage.*?end_deletion_nucleotide_coverage\}\})//gs; # s includes newline chars; non-greedy
127 warn "Deleting the following content:\n$1\n\n" if ($verbose);
128 sleep(3) if ($verbose);
129 }
130
131 if ($splitting_report){ # optional
132 $doc = read_splitting_report($splitting_report,$doc);
133
134 # removing the delete tags in the html template
135 $doc =~ s/\{\{start_deletion_splitting\}\}//g;
136 $doc =~ s/\{\{end_deletion_splitting\}\}//g;
137 }
138 else{
139 # removing the entire graph and table section for the deduplication part
140 warn "Removing the splitting report section from the HTML report\n" if ($verbose);
141 $doc =~ s/(\{\{start_deletion_splitting.*?end_deletion_splitting\}\})//gs; # s includes newline chars; non-greedy
142 warn "Deleting the following content:\n$1\n\n" if ($verbose);
143 sleep(3) if ($verbose);
144 }
145
146 if ($mbias_report){ # optional
147 (my $state, $doc) = read_mbias_report($mbias_report,$doc);
148
149 # removing the delete tags in the html template
150 $doc =~ s/\{\{start_deletion_mbias\}\}//g;
151 $doc =~ s/\{\{end_deletion_mbias\}\}//g;
152
153 warn "Reported read state: $state\n\n" if ($verbose);
154 # if the report was single-end we need to remove the second plot only
155 if ($state eq 'single'){
156 $doc =~ s/(\{\{start_deletion_mbias_2.*?end_deletion_mbias_2\}\})//gs; # s includes newline chars; non-greedy
157 warn "Deleting the following content:\n$1\n\n" if ($verbose);
158 }
159 else{
160 $doc =~ s/\{\{start_deletion_mbias_2\}\}//g;
161 $doc =~ s/\{\{end_deletion_mbias_2\}\}//g;
162 }
163 }
164 else{
165 # removing the entire graph and table section for the deduplication part
166 $doc =~ s/(\{\{start_deletion_mbias.*?end_deletion_mbias\}\})//gs; # s includes newline chars; non-greedy
167 warn "Deleting the following content:\n$1\n\n" if ($verbose);
168 sleep(3) if ($verbose);
169 }
170
171 write_out_report($report_output,$doc);
172
173 }
174
175 sub write_out_report{
176 my ($report_output,$doc) = @_;
177 open (OUT,'>',$report_output) or die "Failed to write to output file $report_output: $!\n\n";
178 print OUT $doc;
179 }
180
181 sub getLoggingTime {
182 my $doc = shift;
183 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
184
185 my $time = sprintf ("%02d:%02d", $hour,$min,$sec);
186 my $date = sprintf ("%04d-%02d-%02d", $year+1900,$mon+1,$mday);
187 warn "Using Time: $time, and date: $date\n\n" if ($verbose);
188
189 $doc =~ s/\{\{date\}\}/$date/g;
190 $doc =~ s/\{\{time\}\}/$time/g;
191
192 return $doc;
193 }
194
195
196 sub read_alignment_report{
197
198 my ($alignment_report,$doc) = @_;
199
200 warn "Processing alignment report $alignment_report ...\n";
201 open (ALN,$alignment_report) or die "Couldn't read from file $alignment_report: $!\n\n";
202
203 my $unique;
204 my $no_aln;
205 my $multiple;
206 my $no_genomic;
207 my $total_seqs;
208 my $bismark_version;
209 my $input_filename;
210
211 my $unique_text;
212 my $no_aln_text;
213 my $multiple_text;
214 my $total_seq_text;
215
216 my $total_C_count;
217 my ($meth_CpG,$meth_CHG,$meth_CHH,$meth_unknown);
218 my ($unmeth_CpG,$unmeth_CHG,$unmeth_CHH,$unmeth_unknown);
219 my ($perc_CpG,$perc_CHG,$perc_CHH,$perc_unknown);
220
221 my $number_OT;
222 my $number_CTOT;
223 my $number_CTOB;
224 my $number_OB;
225
226 while (<ALN>){
227 chomp;
228
229 ### General Alignment stats
230 if ($_ =~ /^Sequence pairs analysed in total:/ ){ ## Paired-end
231 (undef,$total_seqs) = split /\t/;
232 print "Total paired seqs: >> $total_seqs <<\n" if ($verbose);
233 $total_seq_text = 'Sequence pairs analysed in total';
234 }
235 elsif ($_ =~ /^Sequences analysed in total:/ ){ ## Single-end
236 (undef,$total_seqs) = split /\t/;
237 $total_seq_text = 'Sequences analysed in total';
238 print "total single-end seqs >> $total_seqs <<\n" if ($verbose);
239 }
240
241 elsif($_ =~ /^Bismark report for: (.*) \(version: (.*)\)/){
242 $input_filename = $1;
243 $bismark_version = $2;
244 print "Input filename(s) >> $input_filename <<\n" if ($verbose);
245 print "Bismark version >> $bismark_version <<\n" if ($verbose);
246 }
247
248 elsif($_ =~ /^Number of paired-end alignments with a unique best hit:/){ ## Paired-end
249 (undef,$unique) = split /\t/;
250 print "Unique PE>> $unique <<\n" if ($verbose);;
251 $unique_text = 'Paired-end alignments with a unique best hit';
252 }
253 elsif($_ =~ /^Number of alignments with a unique best hit from/){ ## Single-end
254 (undef,$unique) = split /\t/;
255 print "Unique SE>> $unique <<\n" if ($verbose);
256 $unique_text = 'Single-end alignments with a unique best hit';
257 }
258
259 elsif($_ =~ /^Sequence pairs with no alignments under any condition:/){ ## Paired-end
260 (undef,$no_aln) = split /\t/;
261 print "No alignment PE >> $no_aln <<\n" if ($verbose);
262 $no_aln_text = 'Pairs without alignments under any condition';
263 }
264 elsif($_ =~ /^Sequences with no alignments under any condition:/){ ## Single-end
265 (undef,$no_aln) = split /\t/;
266 print "No alignments SE>> $no_aln <<\n" if ($verbose);
267 $no_aln_text = 'Sequences without alignments under any condition';
268 }
269
270 elsif($_ =~ /^Sequence pairs did not map uniquely:/){ ## Paired-end
271 (undef,$multiple) = split /\t/;
272 print "Multiple alignments PE >> $multiple <<\n" if ($verbose);
273 $multiple_text = 'Pairs that did not map uniquely';
274 }
275 elsif($_ =~ /^Sequences did not map uniquely:/){ ## Single-end
276 (undef,$multiple) = split /\t/;
277 print "Multiple alignments SE >> $multiple <<\n" if ($verbose);
278 $multiple_text = 'Sequences that did not map uniquely';
279 }
280
281 elsif($_ =~ /^Sequence pairs which were discarded because genomic sequence could not be extracted:/){ ## Paired-end
282 (undef,$no_genomic) = split /\t/;
283 print "No genomic sequence PE >> $no_genomic <<\n" if ($verbose);
284 }
285 elsif($_ =~ /^Sequences which were discarded because genomic sequence could not be extracted:/){ ## Single-end
286 (undef,$no_genomic) = split /\t/;
287 print "No genomic sequence SE>> $no_genomic <<\n" if ($verbose);
288 }
289
290 ### Context Methylation
291 elsif($_ =~ /^Total number of C/ ){
292 (undef,$total_C_count) = split /\t/;
293 print "Total number C >> $total_C_count <<\n" if ($verbose);
294 }
295
296 elsif($_ =~ /^Total methylated C\'s in CpG context:/ ){
297 (undef,$meth_CpG) = split /\t/;
298 print "meth CpG >> $meth_CpG <<\n" if ($verbose);
299 }
300 elsif($_ =~ /^Total methylated C\'s in CHG context:/ ){
301 (undef,$meth_CHG) = split /\t/;
302 print "meth CHG >> $meth_CHG <<\n" if ($verbose);
303 }
304 elsif($_ =~ /^Total methylated C\'s in CHH context:/ ){
305 (undef,$meth_CHH) = split /\t/;
306 print "meth CHH >> $meth_CHH <<\n" if ($verbose);
307 }
308 elsif($_ =~ /^Total methylated C\'s in Unknown context:/ ){
309 (undef,$meth_unknown) = split /\t/;
310 print "meth Unknown >> $meth_unknown <<\n" if ($verbose);
311 }
312
313 elsif($_ =~ /^Total unmethylated C\'s in CpG context:/ or $_ =~ /^Total C to T conversions in CpG context:/){
314 (undef,$unmeth_CpG) = split /\t/;
315 print "unmeth CpG >> $unmeth_CpG <<\n" if ($verbose);
316 }
317 elsif($_ =~ /^Total unmethylated C\'s in CHG context:/ or $_ =~ /^Total C to T conversions in CHG context:/){
318 (undef,$unmeth_CHG) = split /\t/;
319 print "unmeth CHG >> $unmeth_CHG <<\n" if ($verbose);
320 }
321 elsif($_ =~ /^Total unmethylated C\'s in CHH context:/ or $_ =~ /^Total C to T conversions in CHH context:/){
322 (undef,$unmeth_CHH) = split /\t/;
323 print "unmeth CHH >> $unmeth_CHH <<\n"if ($verbose);
324 }
325 elsif($_ =~ /^Total unmethylated C\'s in Unknown context:/ or $_ =~ /^Total C to T conversions in Unknown context:/){
326 (undef,$unmeth_unknown) = split /\t/;
327 print "unmeth Unknown >> $unmeth_unknown <<\n" if ($verbose);
328 }
329
330 elsif($_ =~ /^C methylated in CpG context:/ ){
331 (undef,$perc_CpG) = split /\t/;
332 $perc_CpG =~ s/%//;
333 print "percentage CpG >> $perc_CpG <<\n" if ($verbose);
334 }
335 elsif($_ =~ /^C methylated in CHG context:/ ){
336 (undef,$perc_CHG) = split /\t/;
337 $perc_CHG =~ s/%//;
338 print "percentage CHG >> $perc_CHG <<\n" if ($verbose);
339 }
340 elsif($_ =~ /^C methylated in CHH context:/ ){
341 (undef,$perc_CHH) = split /\t/;
342 $perc_CHH =~ s/%//;
343 print "percentage CHH >> $perc_CHH <<\n" if ($verbose);
344 }
345 elsif($_ =~ /^C methylated in Unknown context:/ ){
346 (undef,$perc_unknown) = split /\t/;
347 $perc_unknown =~ s/%//;
348 print "percentage Unknown >> $perc_unknown <<\n" if ($verbose);
349 }
350
351
352 ### Strand Origin
353
354 elsif($_ =~ /^CT\/GA\/CT:/ ){ ## Paired-end
355 (undef,$number_OT) = split /\t/;
356 print "Number OT PE>> $number_OT <<\n" if ($verbose);
357 }
358 elsif($_ =~ /^CT\/CT:/ ){ ## Single-end
359 (undef,$number_OT) = split /\t/;
360 print "Number OT SE>> $number_OT <<\n" if ($verbose);
361 }
362
363 elsif($_ =~ /^GA\/CT\/CT:/ ){ ## Paired-end
364 (undef,$number_CTOT) = split /\t/;
365 print "Number CTOT PE >> $number_CTOT <<\n" if ($verbose);
366 }
367 elsif($_ =~ /^GA\/CT:/ ){ ## Single-end
368 (undef,$number_CTOT) = split /\t/;
369 print "Number CTOT SE >> $number_CTOT <<\n" if ($verbose);
370 }
371
372 elsif($_ =~ /^GA\/CT\/GA:/ ){ ## Paired-end
373 (undef,$number_CTOB) = split /\t/;
374 print "Number CTOB PE >> $number_CTOB <<\n" if ($verbose);
375 }
376 elsif($_ =~ /^GA\/GA:/ ){ ## Single-end
377 (undef,$number_CTOB) = split /\t/;
378 print "Number CTOB SE >> $number_CTOB <<\n" if ($verbose);
379 }
380
381 elsif($_ =~ /^CT\/GA\/GA:/ ){ ## Paired-end
382 (undef,$number_OB) = split /\t/;
383 print "Number OB PE >> $number_OB <<\n" if ($verbose);
384 }
385 elsif($_ =~ /^CT\/GA:/ ){ ## Single-end
386 (undef,$number_OB) = split /\t/;
387 print "Number OB SE >> $number_OB <<\n" if ($verbose);
388 }
389
390
391 }
392
393 if (defined $unique and defined $no_aln and defined $multiple and defined $no_genomic and defined $total_seqs){
394 warn "Got all necessary information, editing HTML report\n" if ($verbose);
395
396 ### General Alignment Stats
397 $doc =~ s/\{\{unique_seqs\}\}/$unique/g;
398 $doc =~ s/\{\{unique_seqs_text\}\}/$unique_text/g;
399
400 $doc =~ s/\{\{no_alignments\}\}/$no_aln/g;
401 $doc =~ s/\{\{no_alignments_text\}\}/$no_aln_text/g;
402
403 $doc =~ s/\{\{multiple_alignments\}\}/$multiple/g;
404 $doc =~ s/\{\{multiple_alignments_text\}\}/$multiple_text/g;
405
406 $doc =~ s/\{\{no_genomic\}\}/$no_genomic/g;
407
408 $doc =~ s/\{\{total_sequences_alignments\}\}/$total_seqs/g;
409 $doc =~ s/\{\{sequences_analysed_in_total\}\}/$total_seq_text/g;
410
411 $doc =~ s/\{\{filename\}\}/$input_filename/g;
412 $doc =~ s/\{\{bismark_version\}\}/$bismark_version/g;
413
414 ### Strand Origin
415 $doc =~ s/\{\{number_OT\}\}/$number_OT/g;
416 $doc =~ s/\{\{number_CTOT\}\}/$number_CTOT/g;
417 $doc =~ s/\{\{number_CTOB\}\}/$number_CTOB/g;
418 $doc =~ s/\{\{number_OB\}\}/$number_OB/g;
419
420 ### Context Methylation
421 $doc =~ s/\{\{total_C_count\}\}/$total_C_count/g;
422
423 unless (defined $perc_CpG){
424 $perc_CpG = 'N/A';
425 }
426 unless (defined $perc_CHG){
427 $perc_CHG = 'N/A';
428 }
429 unless (defined $perc_CHH){
430 $perc_CHH = 'N/A';
431 }
432 unless (defined $perc_unknown){
433 $perc_unknown = 'N/A';
434 }
435
436 ### Unknown sequence context, just for Bowtie 2 alignments
437 my $meth_unknown_inject;
438 my $unmeth_unknown_inject;
439 my $perc_unknown_inject;
440
441 if (defined $meth_unknown){ # if one Unknown context file is present, so should the others
442 $meth_unknown_inject = " <tr>
443 <th>Methylated C's in Unknown context</th>
444 <td>$meth_unknown</td>
445 </tr>";
446 $unmeth_unknown_inject = " <tr>
447 <th>Unmethylated C's in Unknown context</th>
448 <td>$unmeth_unknown</td>
449 </tr>";
450 $perc_unknown_inject = " <tr>
451 <th>Methylated C's in Unknown context</th>
452 <td>$perc_unknown%</td>
453 </tr>";
454 }
455 else{
456 $meth_unknown_inject = $unmeth_unknown_inject = $perc_unknown_inject = '';
457 }
458
459 ### injecting this into the table
460 $doc =~ s/\{\{meth_unknown\}\}/$meth_unknown_inject/g;
461 $doc =~ s/\{\{unmeth_unknown\}\}/$unmeth_unknown_inject/g;
462 $doc =~ s/\{\{perc_unknown\}\}/$perc_unknown_inject/g;
463
464
465 $doc =~ s/\{\{meth_CpG\}\}/$meth_CpG/g;
466 $doc =~ s/\{\{meth_CHG\}\}/$meth_CHG/g;
467 $doc =~ s/\{\{meth_CHH\}\}/$meth_CHH/g;
468
469 $doc =~ s/\{\{unmeth_CpG\}\}/$unmeth_CpG/g;
470 $doc =~ s/\{\{unmeth_CHG\}\}/$unmeth_CHG/g;
471 $doc =~ s/\{\{unmeth_CHH\}\}/$unmeth_CHH/g;
472
473 $doc =~ s/\{\{perc_CpG\}\}/$perc_CpG/g;
474 $doc =~ s/\{\{perc_CHG\}\}/$perc_CHG/g;
475 $doc =~ s/\{\{perc_CHH\}\}/$perc_CHH/g;
476
477 my ($perc_CpG_graph, $perc_CHG_graph,$perc_CHH_graph,$perc_unknown_graph);
478
479 if ($perc_CpG eq 'N/A'){
480 $perc_CpG_graph = 0; # values of 0 won't show in the graph and won't produce errors
481 }
482 else{
483 $perc_CpG_graph = $perc_CpG;
484 }
485
486 if ($perc_CHG eq 'N/A'){
487 $perc_CHG_graph = 0; # values of 0 won't show in the graph and won't produce errors
488 }
489 else{
490 $perc_CHG_graph = $perc_CHG;
491 }
492
493 if ($perc_CHH eq 'N/A'){
494 $perc_CHH_graph = 0; # values of 0 won't show in the graph and won't produce errors
495 }
496 else{
497 $perc_CHH_graph = $perc_CHH;
498 }
499
500 if ($perc_unknown eq 'N/A'){
501 $perc_unknown_graph = 0; # values of 0 won't show in the graph and won't produce errors
502 }
503 else{
504 $perc_unknown_graph = $perc_unknown;
505 }
506
507 $doc =~ s/\{\{perc_CpG_graph\}\}/$perc_CpG_graph/g;
508 $doc =~ s/\{\{perc_CHG_graph\}\}/$perc_CHG_graph/g;
509 $doc =~ s/\{\{perc_CHH_graph\}\}/$perc_CHH_graph/g;
510
511 }
512 else{
513 warn "Am I missing something?\n\n";
514 }
515
516 warn "Complete\n\n";
517 return $doc;
518 }
519
520
521 sub read_deduplication_report{
522
523 my ($dedup_report,$doc) = @_;
524
525 warn "Processing deduplication report $dedup_report ...\n";
526 open (DEDUP,$dedup_report) or die "Couldn't read from file $dedup_report: $!\n\n";
527
528 my $total_seqs;
529 my $dups;
530 my $diff_pos;
531 my $leftover;
532
533 while (<DEDUP>){
534 chomp;
535 if ($_ =~ /^Total number of alignments/){
536 (undef,$total_seqs) = split /\t/;
537 warn "Total number of seqs >> $total_seqs <<\n" if ($verbose);
538 }
539 elsif($_ =~ /^Total number duplicated/){
540 (undef,$dups) = split /\t/;
541 $dups =~ s/\s.*//; # just need the number, not the percentage
542 warn "Duplicated >> $dups <<\n" if ($verbose);
543 }
544 elsif($_ =~ /^Duplicated alignments were found at/){
545 (undef,$diff_pos) = split /\t/;
546 $diff_pos =~ s/\s.*//; # just need the number
547 warn "Different positions >> $diff_pos <<\n" if ($verbose);
548 }
549 elsif($_ =~ /^Total count of deduplicated leftover sequences: (\d+)/){
550 $leftover = $1;
551 warn "Leftover seqs >> $leftover <<\n" if ($verbose);
552 }
553 }
554
555 unless (defined $leftover){
556 if (defined $dups and defined $total_seqs){
557 $leftover = $total_seqs - $dups;
558 }
559 }
560
561 # Checking if we got all we need
562 if (defined $dups and defined $total_seqs and defined $diff_pos and defined $leftover){
563 # warn "Got all I need!\n\n";
564 $doc =~ s/\{\{seqs_total_duplicates\}\}/$total_seqs/g;
565 $doc =~ s/\{\{unique_alignments_duplicates\}\}/$leftover/g;
566 $doc =~ s/\{\{duplicate_alignments_duplicates\}\}/$dups/g;
567 $doc =~ s/\{\{different_positions_duplicates\}\}/$diff_pos/g;
568 }
569 else{
570 warn "Something went wrong... Use --verbose to get a clue...\n";
571 # skipping this plot entirely if values could not be extracted
572 return $doc;
573 }
574 warn "Complete\n\n";
575 return $doc;
576 }
577
578
579 sub read_nucleotide_coverage_report{
580
581 my ($nuc_report,$doc) = @_;
582
583 warn "Processing nucleotide coverage report '$nuc_report' ...\n";
584 open (NUC,$nuc_report) or die "Couldn't read from file $nuc_report: $!\n\n";
585
586 my %nucs; # storing nucleotides and frequencies
587 my $linecount = 0;
588
589 while (<NUC>){
590 chomp;
591 $_ =~ s/\r//; # removing carriage returns
592 # warn "$_\n"; sleep(1);
593 my ($element,$count_obs,$observed,$count_exp,$expected,$coverage) = (split /\t/);
594 # warn "$element , $count_obs , $observed , $count_exp , $expected, $coverage\n"; sleep(1);
595 if ($linecount == 0){ # verifying that the data appears to be a Bismark nucleotide coverage report
596 if ($observed eq 'percent sample'){
597 # warn "Fine, found '$observed'\n";
598 }
599 else{
600 die "Expected to find 'percent sample' as entry in line 1, column 3 but found '$observed'. This doesn't look like a Bismark nucleotide coverage report. Please respecify!\n";
601 }
602
603 if ($expected eq 'percent genomic'){
604 # warn "Fine, found '$expected'\n";
605 }
606 else{
607 die "Expected to find 'percent genomic' as entry in line 1, column 5 but found '$expected'. This doesn't look like a Bismark nucleotide coverage report. Please respecify!\n";
608 }
609 }
610 else{
611 $nucs{$element}->{obs}->{percent} = $observed;
612 $nucs{$element}->{exp}->{percent} = $expected;
613 $nucs{$element}->{obs}->{counts} = $count_obs;
614 $nucs{$element}->{exp}->{counts} = $count_exp;
615 $nucs{$element}->{obs}->{coverage} = $coverage; # coverage of that nucleotide in the sample
616 warn "Element '$element' observed: $observed\n" if $verbose;
617 warn "Element '$element' expected: $expected\n" if $verbose;
618 }
619
620 ++$linecount;
621
622 }
623
624 # Checking if we got all we need
625 my $looksOK = 1;
626 foreach my $key (keys %nucs){
627 unless ( (defined $nucs{$key}->{obs}) and (defined $nucs{$key}->{exp})){
628 $looksOK = 0;
629 }
630 }
631
632 if ($looksOK){
633 warn "Got all necessary information, editing HTML report ...\n" if $verbose;
634 my $minmax = 0;
635 foreach my $key (sort {$a cmp $b} keys %nucs){
636 my $nuc_obs = $nucs{$key}->{obs}->{percent};
637 my $nuc_exp = $nucs{$key}->{exp}->{percent};
638 my $counts_obs = $nucs{$key}->{obs}->{counts};
639 my $counts_exp = $nucs{$key}->{exp}->{counts};
640 my $cov = $nucs{$key}->{obs}->{coverage};
641
642 # calculating log2 observed/expected
643 my $ratio = $nuc_obs/$nuc_exp;
644 # my $logratio = sprintf ("%.2f",log($ratio)/log(2));
645 # if (abs($logratio) > $minmax){
646 # $minmax = abs($logratio);
647 # }
648 warn "$key\tnuc_${key}_obs\t$nuc_obs\tnuc_${key}_exp\t$nuc_exp\tratio: $ratio\n" if $verbose;
649
650 $doc =~ s/\{\{nuc_${key}_p_obs\}\}/$nuc_obs/g;
651 $doc =~ s/\{\{nuc_${key}_p_exp\}\}/$nuc_exp/g;
652 $doc =~ s/\{\{nuc_${key}_counts_obs\}\}/$counts_obs/g;
653 $doc =~ s/\{\{nuc_${key}_counts_exp\}\}/$counts_exp/g;
654 $doc =~ s/\{\{nuc_${key}_coverage\}\}/$cov/g;
655 }
656 # warn "Minimum/maxium ratio was: $minmax\n" if $verbose;
657 # $doc =~ s/\{\{nuc_minmax\}\}/$minmax/g;
658 }
659 else{
660 warn "Something went wrong, skipping this plot entirely... Use --verbose to get a clue...\n";
661 # skipping this plot entirely if values could not be extracted
662 return $doc;
663 }
664
665 warn "Complete\n\n";
666 return $doc;
667 }
668
669
670 sub read_splitting_report{
671
672 my ($splitting_report,$doc) = @_;
673
674 warn "Processing splitting report $splitting_report ...\n";
675 open (SPLIT,$splitting_report) or die "Couldn't read from file $splitting_report: $!\n\n";
676
677 my $total_seqs;
678
679 my $total_C_count;
680 my ($meth_CpG,$meth_CHG,$meth_CHH,$meth_unknown);
681 my ($unmeth_CpG,$unmeth_CHG,$unmeth_CHH,$unmeth_unknown);
682 my ($perc_CpG,$perc_CHG,$perc_CHH,$perc_unknown);
683
684 while (<SPLIT>){
685 chomp;
686
687 ### Context Methylation
688 if($_ =~ /^Total number of C/ ){
689 (undef,$total_C_count) = split /\t/;
690 print "total calls >> $total_C_count <<\n" if ($verbose);
691 }
692
693 elsif($_ =~ /^Total methylated C\'s in CpG context:/ ){
694 (undef,$meth_CpG) = split /\t/;
695 print "meth CpG >> $meth_CpG <<\n" if ($verbose);
696 }
697 elsif($_ =~ /^Total methylated C\'s in CHG context:/ ){
698 (undef,$meth_CHG) = split /\t/;
699 print "meth CHG>> $meth_CHG <<\n" if ($verbose);
700 }
701 elsif($_ =~ /^Total methylated C\'s in CHH context:/ ){
702 (undef,$meth_CHH) = split /\t/;
703 print "meth CHH >> $meth_CHH <<\n" if ($verbose);
704 }
705 elsif($_ =~ /^Total methylated C\'s in Unknown context:/ ){
706 (undef,$meth_unknown) = split /\t/;
707 print "meth Unknown >> $meth_unknown <<\n" if ($verbose);
708 }
709
710 elsif($_ =~ /^Total C to T conversions in CpG context:/ ){
711 (undef,$unmeth_CpG) = split /\t/;
712 print "unmeth CpG >> $unmeth_CpG <<\n" if ($verbose);
713 }
714 elsif($_ =~ /^Total C to T conversions in CHG context:/ ){
715 (undef,$unmeth_CHG) = split /\t/;
716 print "unmeth CHG >> $unmeth_CHG <<\n" if ($verbose);
717 }
718 elsif($_ =~ /^Total C to T conversions in CHH context:/ ){
719 (undef,$unmeth_CHH) = split /\t/;
720 print "unmeth CHH >> $unmeth_CHH <<\n" if ($verbose);
721 }
722 elsif($_ =~ /^Total C to T conversions in Unknown context:/ ){
723 (undef,$unmeth_unknown) = split /\t/;
724 print "unmeth Unknown >> $unmeth_unknown <<\n" if ($verbose);
725 }
726
727 elsif($_ =~ /^C methylated in CpG context:/ ){
728 (undef,$perc_CpG) = split /\t/;
729 $perc_CpG =~ s/%//;
730 print "percentage CpG >> $perc_CpG <<\n" if ($verbose);
731 }
732 elsif($_ =~ /^C methylated in CHG context:/ ){
733 (undef,$perc_CHG) = split /\t/;
734 $perc_CHG =~ s/%//;
735 print "percentage CHG >> $perc_CHG <<\n" if ($verbose);
736 }
737 elsif($_ =~ /^C methylated in CHH context:/ ){
738 (undef,$perc_CHH) = split /\t/;
739 $perc_CHH =~ s/%//;
740 print "percentage CHH >> $perc_CHH <<\n" if ($verbose);
741 }
742 elsif($_ =~ /^C methylated in Unknown context:/ ){
743 (undef,$perc_unknown) = split /\t/;
744 $perc_unknown =~ s/%//;
745 print "percentage unknown >> $perc_unknown <<\n" if ($verbose);
746 }
747 }
748
749 if (defined $meth_CpG and defined $meth_CHG and defined $meth_CHH and defined $unmeth_CpG and defined $unmeth_CHG and defined $unmeth_CHH){
750 warn "Got all necessary information, editing HTML report ...\n" if ($verbose);
751
752 ### Context Methylation
753 $doc =~ s/\{\{total_C_count_splitting\}\}/$total_C_count/g;
754
755 $doc =~ s/\{\{meth_CpG_splitting\}\}/$meth_CpG/g;
756 $doc =~ s/\{\{meth_CHG_splitting\}\}/$meth_CHG/g;
757 $doc =~ s/\{\{meth_CHH_splitting\}\}/$meth_CHH/g;
758
759 $doc =~ s/\{\{unmeth_CpG_splitting\}\}/$unmeth_CpG/g;
760 $doc =~ s/\{\{unmeth_CHG_splitting\}\}/$unmeth_CHG/g;
761 $doc =~ s/\{\{unmeth_CHH_splitting\}\}/$unmeth_CHH/g;
762
763 unless (defined $perc_CpG){
764 $perc_CpG = 'N/A';
765 }
766 unless (defined $perc_CHG){
767 $perc_CHG = 'N/A';
768 }
769 unless (defined $perc_CHH){
770 $perc_CHH = 'N/A';
771 }
772 unless (defined $perc_unknown){
773 $perc_unknown = 'N/A';
774 }
775
776 ### Unknown sequence context, just for Bowtie 2 alignments
777 my $meth_unknown_inject;
778 my $unmeth_unknown_inject;
779 my $perc_unknown_inject;
780
781 if (defined $meth_unknown){ # if one Unknown context file is present, so should the others
782 $meth_unknown_inject = " <tr>
783 <th>Methylated C's in Unknown context</th>
784 <td>$meth_unknown</td>
785 </tr>";
786 $unmeth_unknown_inject = " <tr>
787 <th>Unmethylated C's in Unknown context</th>
788 <td>$unmeth_unknown</td>
789 </tr>";
790 $perc_unknown_inject = " <tr>
791 <th>Methylated C's in Unknown context</th>
792 <td>$perc_unknown%</td>
793 </tr>";
794 }
795 else{
796 $meth_unknown_inject = $unmeth_unknown_inject = $perc_unknown_inject = '';
797 }
798
799 ### injecting this into the table
800 $doc =~ s/\{\{meth_unknown_splitting\}\}/$meth_unknown_inject/g;
801 $doc =~ s/\{\{unmeth_unknown_splitting\}\}/$unmeth_unknown_inject/g;
802 $doc =~ s/\{\{perc_unknown_splitting\}\}/$perc_unknown_inject/g;
803
804 # for the graph we need to take care that there are no N/A values in the percentage fields
805 my ($perc_CpG_graph, $perc_CHG_graph,$perc_CHH_graph,$perc_unknown_graph);
806
807 if ($perc_CpG eq 'N/A'){
808 $perc_CpG_graph = 0; # values of 0 won't show in the graph and won't produce errors
809 }
810 else{
811 $perc_CpG_graph = $perc_CpG;
812 }
813
814 if ($perc_CHG eq 'N/A'){
815 $perc_CHG_graph = 0; # values of 0 won't show in the graph and won't produce errors
816 }
817 else{
818 $perc_CHG_graph = $perc_CHG;
819 }
820
821 if ($perc_CHH eq 'N/A'){
822 $perc_CHH_graph = 0; # values of 0 won't show in the graph and won't produce errors
823 }
824 else{
825 $perc_CHH_graph = $perc_CHH;
826 }
827
828 if ($perc_unknown eq 'N/A'){
829 $perc_unknown_graph = 0; # values of 0 won't show in the graph and won't produce errors
830 }
831 else{
832 $perc_unknown_graph = $perc_unknown;
833 }
834
835 $doc =~ s/\{\{perc_CpG_graph_splitting\}\}/$perc_CpG_graph/g;
836 $doc =~ s/\{\{perc_CHG_graph_splitting\}\}/$perc_CHG_graph/g;
837 $doc =~ s/\{\{perc_CHH_graph_splitting\}\}/$perc_CHH_graph/g;
838
839 $doc =~ s/\{\{perc_CpG_splitting\}\}/$perc_CpG/g;
840 $doc =~ s/\{\{perc_CHG_splitting\}\}/$perc_CHG/g;
841 $doc =~ s/\{\{perc_CHH_splitting\}\}/$perc_CHH/g;
842 }
843 else{
844 warn "Am I missing something? Try using --verbose to get a clue...\n\n";
845 }
846 warn "Complete\n\n";
847
848 return $doc;
849
850 }
851
852
853 sub read_mbias_report{
854
855 my ($mbias_report,$doc) = @_;
856
857 warn "Processing M-bias report $mbias_report ...\n";
858 open (MBIAS,$mbias_report) or die "Couldn't read from file $mbias_report: $!\n\n";
859
860 my %mbias_1;
861 my %mbias_2;
862
863 my $context;
864 my $read_identity;
865 my $state = 'single'; # setting this to 'single' if there is no read 2
866
867 while (<MBIAS>){
868 chomp;
869 if ($_ =~ /^(C.{2}) context/){
870 $context = $1;
871
872 if ($_ =~ /R2/){
873 $read_identity = 2;
874 $state = 'paired';
875 }
876 else{
877 $read_identity = 1;
878 }
879
880 # warn "new context is: $context\n";
881 # warn "Read identity is: Read $read_identity\n";
882 }
883 if ($_ =~ /^\d/){
884 my ($pos,$meth,$unmeth,$perc,$coverage) = (split /\t/);
885 if ($read_identity == 1){
886 push @{$mbias_1{$context}->{coverage}}, "[$pos, $coverage]";
887 push @{$mbias_1{$context}->{perc}}, "[$pos, $perc]";
888 }
889 elsif ($read_identity == 2){
890 push @{$mbias_2{$context}->{coverage}}, "[$pos, $coverage]";
891 push @{$mbias_2{$context}->{perc}}, "[$pos, $perc]";
892 }
893 else{
894 warn "read identity was unknown : '$read_identity'\n\n";
895 }
896
897 # print join (" ",$pos,$meth,$unmeth,$perc,$coverage)."\n";
898 }
899 }
900
901 # Read 1 M-bias
902 my $r1_CpG_coverage = join (',',@{$mbias_1{'CpG'}->{coverage}});
903 my $r1_CpG_perc = join (',',@{$mbias_1{'CpG'}->{perc}});
904
905 my $r1_CHG_coverage = join (',',@{$mbias_1{'CHG'}->{coverage}});
906 my $r1_CHG_perc = join (',',@{$mbias_1{'CHG'}->{perc}});
907
908 my $r1_CHH_coverage = join (',',@{$mbias_1{'CHH'}->{coverage}});
909 my $r1_CHH_perc = join (',',@{$mbias_1{'CHH'}->{perc}});
910
911 $doc =~ s/\{\{CpG_total_calls_R1\}\}/$r1_CpG_coverage/g;
912 $doc =~ s/\{\{CHG_total_calls_R1\}\}/$r1_CHG_coverage/g;
913 $doc =~ s/\{\{CHH_total_calls_R1\}\}/$r1_CHH_coverage/g;
914
915 $doc =~ s/\{\{CpG_methylation_R1\}\}/$r1_CpG_perc/g;
916 $doc =~ s/\{\{CHG_methylation_R1\}\}/$r1_CHG_perc/g;
917 $doc =~ s/\{\{CHH_methylation_R1\}\}/$r1_CHH_perc/g;
918
919 # Read 2 M-bias
920 if (%mbias_2){
921 my $r2_CpG_coverage = join (',',@{$mbias_2{'CpG'}->{coverage}});
922 my $r2_CpG_perc = join (',',@{$mbias_2{'CpG'}->{perc}});
923
924 my $r2_CHG_coverage = join (',',@{$mbias_2{'CHG'}->{coverage}});
925 my $r2_CHG_perc = join (',',@{$mbias_2{'CHG'}->{perc}});
926
927 my $r2_CHH_coverage = join (',',@{$mbias_2{'CHH'}->{coverage}});
928 my $r2_CHH_perc = join (',',@{$mbias_2{'CHH'}->{perc}});
929
930 $doc =~ s/\{\{CpG_total_calls_R2\}\}/$r2_CpG_coverage/g;
931 $doc =~ s/\{\{CHG_total_calls_R2\}\}/$r2_CHG_coverage/g;
932 $doc =~ s/\{\{CHH_total_calls_R2\}\}/$r2_CHH_coverage/g;
933
934 $doc =~ s/\{\{CpG_methylation_R2\}\}/$r2_CpG_perc/g;
935 $doc =~ s/\{\{CHG_methylation_R2\}\}/$r2_CHG_perc/g;
936 $doc =~ s/\{\{CHH_methylation_R2\}\}/$r2_CHH_perc/g;
937 }
938 warn "Complete\n\n";
939
940 return ($state,$doc);
941
942 }
943
944
945 sub read_report_template{
946 my $doc;
947 warn "Attempting to open file from: $Bin/bismark_sitrep.tpl\n\n" if ($verbose);
948 open (DOC,"$Bin/bismark_sitrep.tpl") or die $!;
949 while(<DOC>){
950 chomp;
951 $_ =~ s/\r//g;
952 $doc .= $_."\n";
953 }
954
955 close DOC or die $!;
956 return $doc;
957 }
958
959
960
961 sub process_commandline{
962 my $help;
963 my $output_dir;
964 my $manual_output_file;
965 my $alignment_report;
966 my $dedup_report;
967 my $splitting_report;
968 my $mbias_report;
969 my $nucleotide_coverage_report; # stores nucleotide coverage statistics
970 my $verbose;
971
972 my $version;
973
974 my $command_line = GetOptions ('help|man' => \$help,
975 'dir=s' => \$output_dir,
976 'o|output=s' => \$manual_output_file,
977 'alignment_report=s' => \$alignment_report,
978 'dedup_report=s' => \$dedup_report,
979 'splitting_report=s' => \$splitting_report,
980 'mbias_report=s' => \$mbias_report,
981 'nucleotide_report=s' => \$nucleotide_coverage_report,
982 'version' => \$version,
983 'verbose' => \$verbose,
984 );
985
986 ### EXIT ON ERROR if there were errors with any of the supplied options
987 unless ($command_line){
988 die "Please respecify command line options\n";
989 }
990
991 ### HELPFILE
992 if ($help){
993 print_helpfile();
994 exit;
995 }
996
997 if ($version){
998 print << "VERSION";
999
1000
1001 Bismark HTML Report Module
1002
1003 bismark2report version: $bismark2report_version
1004 Copyright 2010-16 Felix Krueger
1005 Babraham Bioinformatics
1006 www.bioinformatics.babraham.ac.uk/projects/bismark/
1007
1008
1009 VERSION
1010 exit;
1011 }
1012
1013 ### OUTPUT DIR PATH
1014 if (defined $output_dir){
1015 unless ($output_dir eq ''){ # if the output dir has been passed on by the methylation extractor and is an empty string we don't want to change it
1016 unless ($output_dir =~ /\/$/){
1017 $output_dir =~ s/$/\//;
1018 }
1019 }
1020 }
1021 else{
1022 $output_dir = '';
1023 }
1024
1025
1026 ## First we are looking for alignment reports, and then look whether there are any optional plots with the same base name
1027
1028 if ($alignment_report){
1029 ### we only process the one alignment report (and possibly the other ones as well) that was specified
1030 push @alignment_reports, $alignment_report;
1031 }
1032 else{ ### no alignment report specified, looking in the current directory for files ending in *E_report.txt (SE or PE)
1033
1034 ### looking in the current directory for report files. Less than 1 report file is not allowed
1035 @alignment_reports = <*E_report.txt>;
1036
1037 if (scalar @alignment_reports == 0){
1038 warn "Found no potential alignment reports in the current directory. Please specify a single Bismark alignment report file using the option '--alignment_report FILE'\n\n";
1039 print_helpfile();
1040 exit;
1041 }
1042 else{
1043 # there are Bismark alignment report(s) in the directory
1044 warn "Found ",scalar @alignment_reports," alignment reports in current directory. Now trying to figure out whether there are corresponding optional reports\n";
1045 }
1046 }
1047
1048 ### Ensuring that there isn't more than 1 file in @alignment_reports if someone manually specified an output file.
1049 if (scalar @alignment_reports > 1){
1050 if (defined $manual_output_file){
1051 die "You cannot run bismark2report on more than 1 file while specifying a single output file. Either lose the option -o to derive the output filenames automatically, or specify a single Bismark alignment report file using the option '--alignment_report FILE'\n\n";
1052 }
1053 }
1054
1055 foreach my $aln (@alignment_reports){
1056
1057 # warn "Figuring things out for:\n$aln\n";
1058 $aln =~ /^(.+)_(P|S)E_report.txt$/;
1059 my $basename = $1;
1060 # warn "using this basename:\n$basename\n\n";
1061
1062 ### DEDUPLICATION REPORT (optional)
1063 if ($dedup_report){
1064 warn "User specifified dedup report: $dedup_report\n";sleep(3);
1065 if (lc$dedup_report eq 'none'){
1066 push @dedup_reports, ''; # user may wish to skip this step by specifying 'none'
1067 }
1068 else{
1069 push @dedup_reports, $dedup_report;
1070 }
1071 }
1072 else{
1073 ### looking in the current directory for a report file with the same base name
1074 my @dedup_report_files = <$basename*deduplication_report.txt>;
1075 # warn "Number of deduplication reports found in current directory for basename $1: ", scalar @dedup_report_files , "\n";
1076
1077 if (scalar @dedup_report_files > 1){
1078 die "Found ", scalar @dedup_report_files ," potential deduplication reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--dedup_report FILE' or otherwise provide filenames that are easier to figure out...\n\n";
1079 }
1080 elsif (scalar @dedup_report_files == 0){
1081 push @dedup_reports, ''; # no corresponding deduplication report found
1082 }
1083 else{
1084 # there is only a single deduplication report in the current directory, using this one
1085 $dedup_report = shift @dedup_report_files;
1086 push @dedup_reports, $dedup_report;
1087 # warn "Going to use this dedup report: $dedup_report\n\n";
1088 }
1089 }
1090
1091 ### NUCLEOTIDE COVERAGE REPORT (optional)
1092 if (defined $nucleotide_coverage_report){
1093 # warn "User specified nucleotide coverage report: $nucleotide_coverage_report\n";sleep(1);
1094 if (lc$nucleotide_coverage_report eq 'none'){
1095 push @nuc_reports, ''; # user may wish to skip this step by specifying 'none'
1096 }
1097 else{
1098 push @nuc_reports, $nucleotide_coverage_report;
1099 }
1100 }
1101 else{
1102 ### looking in the current directory for a report file with the same base name
1103 my @nucleotide_coverage_report_files = <$basename*nucleotide_stats.txt>;
1104 # warn "Number of nucleotide coverage statistic reports found in current directory for basename $1: ", scalar @nucleotide_coverage_report_files , "\n";
1105
1106 if (scalar @nucleotide_coverage_report_files > 1){
1107 die "Found ", scalar @nucleotide_coverage_report_files ," potential nucleotide coverage reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--nucleotide_report FILE' or otherwise provide filenames that are easier to figure out...\n\n";
1108 }
1109 elsif (scalar @nucleotide_coverage_report_files == 0){
1110 # warn "Found no corresponding nucleotide coverage file\n";
1111 push @nuc_reports, ''; # no corresponding nucleotide coverge file report found
1112 }
1113 else{
1114 # there is only a single nucleotide coverage report in the current directory, using this one
1115 $nucleotide_coverage_report = shift @nucleotide_coverage_report_files;
1116 push @nuc_reports, $nucleotide_coverage_report;
1117 # warn "Going to use this nucleotide coverage report: $nucleotide_coverage_report\n\n"; sleep(3);
1118 }
1119 }
1120
1121
1122 ### METHYLATION EXTRACTOR SPLITTING REPORT
1123 if ($splitting_report){
1124 if (lc$splitting_report eq 'none'){
1125 push @splitting_reports, ''; # user may wish to skip this step by specifying 'none'
1126 }
1127 else{
1128 push @splitting_reports, $splitting_report;
1129 }
1130 }
1131 else{
1132 ### looking in the current directory for a report file with the same basename
1133 my @splitting_report_files = <$basename*splitting_report.txt>;
1134 # warn "Number of splitting reports found in current directory: ", scalar @splitting_report_files , "\n";
1135
1136 if (scalar @splitting_report_files > 1){
1137 die "Found ", scalar @splitting_report_files ," potential methylation extractor splitting reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--splitting_report FILE' or otherwise provide filenames that are easier to figure out...\n\n";
1138 }
1139 elsif (scalar @splitting_report_files == 0){
1140 push @splitting_reports, ''; # no corresponding methylation extractor report found
1141 }
1142 else{
1143 # there is only a single splitting report in the current directory, using this one
1144 $splitting_report = shift @splitting_report_files;
1145 push @splitting_reports, $splitting_report;
1146 }
1147 }
1148
1149 ### M-BIAS REPORT
1150 if ($mbias_report){
1151 if (lc$mbias_report eq 'none'){
1152 $mbias_report = ''; # user may wish to skip this step by specifying 'none'
1153 push @mbias_reports, $mbias_report;
1154 }
1155 else{
1156 push @mbias_reports, $mbias_report;
1157 }
1158 }
1159 else{
1160 ### looking in the current directory for a single report file. More (or less) than 1 report file is not allowed
1161 my @mbias_report_files = <$basename*M-bias.txt>;
1162
1163 # warn "Number of M-bias reports found in current directory: ", scalar @mbias_report_files , "\n";
1164
1165 if (scalar @mbias_report_files > 1){
1166 die "Found ", scalar @mbias_report_files ," potential M-bias reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--mbias_report FILE'or otherwise provide filenames that are easier to figure out automatically ...\n\n";
1167 }
1168 elsif (scalar @mbias_report_files == 0){
1169 push @mbias_reports, '';
1170 }
1171 else{
1172 # there is only a single M-bias report in the current directory, using this one
1173 $mbias_report = shift @mbias_report_files;
1174 push @mbias_reports, $mbias_report;
1175 }
1176 }
1177 $dedup_report = $splitting_report = $mbias_report = $nucleotide_coverage_report = undef;
1178 }
1179
1180 return ($output_dir,$verbose,$manual_output_file);
1181
1182 }
1183
1184 sub print_helpfile{
1185 print <<EOF
1186
1187 SYNOPSIS:
1188
1189 This script uses a Bismark alignment report to generate a graphical HTML report page. Optionally, further reports of
1190 the Bismark suite such as deduplication, methylation extractor splitting or M-bias reports can be specified as well. If several
1191 Bismark reports are found in the same folder, a separate report will be generated for each of these, whereby the output filename
1192 will be derived from the Bismark alignment report file. Bismark2report attempts to find optional reports automatically based
1193 on the file basename.
1194
1195
1196 USAGE: bismark2report [options]
1197
1198
1199 -o/--output <filename> Name of the output file (optional). If not specified explicitly, the output filename will be derived
1200 from the Bismark alignment report file. Specifying an output filename only works if the HTML report is
1201 to be generated for a single Bismark alignment report (and potentially additional reports).
1202
1203 --dir Output directory. Output is written to the current directory if not specified explicitly.
1204
1205
1206 --alignment_report FILE If not specified explicitly, bismark2report attempts to find Bismark report file(s) in the current
1207 directory and produces a separate HTML report for each mapping report file. Based on the basename of
1208 the Bismark mapping report, bismark2report will also attempt to find the other Bismark reports (see below)
1209 for inclusion into the HTML report. Specifying a Bismark alignment report file is mandatory.
1210
1211 --dedup_report FILE If not specified explicitly, bismark2report attempts to find a deduplication report file with the same
1212 basename as the Bismark mapping report (generated by deduplicate_bismark) in the
1213 current working directory. Including a deduplication report is optional, and using the FILE 'none'
1214 will skip this step entirely.
1215
1216 --splitting_report FILE If not specified explicitly, bismark2report attempts to find a splitting report file with the same
1217 basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current
1218 working directory. Including a splitting report is optional, and using the FILE 'none' will skip this
1219 step entirely.
1220
1221 --mbias_report FILE If not specified explicitly, bismark2report attempts to find a single M-bias report file with the same
1222 basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current
1223 working directory. Including an M-Bias report is optional, and using the FILE 'none' will skip this step
1224 entirely.
1225
1226 --nucleotide_report FILE If not specified explicitly, bismark2report attempts to find a single nucleotide coverage report file
1227 with the same basename as the Bismark mapping report (generated by Bismark with the option
1228 '--nucleotide_coverage') in the current working directory. Including a nucleotide coverage statistics
1229 report is optional, and using the FILE 'none' will skip this report entirely.
1230
1231 Script last modified: 13 May 2016
1232
1233 EOF
1234 ;
1235 exit 1;
1236 }
1237