Mercurial > repos > bgruening > bismark
comparison bismark_pretty_report/bismark2report @ 7:fcadce4d9a06 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
author | bgruening |
---|---|
date | Sat, 06 May 2017 13:18:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
6:0f8646f22b8d | 7:fcadce4d9a06 |
---|---|
1 #!/usr/bin/env perl | |
2 use warnings; | |
3 use strict; | |
4 use Getopt::Long; | |
5 use FindBin qw($Bin); | |
6 use lib "$Bin/../lib"; | |
7 | |
8 ## This program is Copyright (C) 2010-16, Felix Krueger (felix.krueger@babraham.ac.uk) | |
9 | |
10 ## This program is free software: you can redistribute it and/or modify | |
11 ## it under the terms of the GNU General Public License as published by | |
12 ## the Free Software Foundation, either version 3 of the License, or | |
13 ## (at your option) any later version. | |
14 | |
15 ## This program is distributed in the hope that it will be useful, | |
16 ## but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 ## GNU General Public License for more details. | |
19 | |
20 ## You should have received a copy of the GNU General Public License | |
21 ## along with this program. If not, see <http://www.gnu.org/licenses/>. | |
22 | |
23 my $bismark2report_version = 'v0.16.3'; | |
24 my (@alignment_reports,@dedup_reports,@splitting_reports,@mbias_reports,@nuc_reports); | |
25 | |
26 my ($output_dir,$verbose,$manual_output_file) = process_commandline(); | |
27 | |
28 # print join (",",@alignment_reports)."\n"; | |
29 # print join (",",@dedup_reports)."\n"; | |
30 # print join (",",@splitting_reports)."\n"; | |
31 # print join (",",@mbias_reports)."\n"; | |
32 # print join (",",@nuc_reports)."\n"; | |
33 | |
34 while (@alignment_reports){ | |
35 | |
36 my $alignment_report = shift @alignment_reports; | |
37 my $dedup_report = shift @dedup_reports; | |
38 my $splitting_report = shift @splitting_reports; | |
39 my $mbias_report = shift @mbias_reports; | |
40 my $nuc_report = shift @nuc_reports; | |
41 | |
42 ### HTML OUTPUT FILE | |
43 my $report_output = $alignment_report; | |
44 $report_output =~ s/^.*\///; # deleting optional path information | |
45 $report_output =~ s/\.txt$//; | |
46 $report_output =~ s/$/.html/; | |
47 | |
48 # if -o output_file was specified we are going to use that name preferentially. This may only happen if there is a single report in the folder, or if a single report has been specified manually | |
49 if ($manual_output_file){ | |
50 warn "A specific output filename was specified: $manual_output_file. Using that one instead of deriving the filename\n"; sleep(1); | |
51 $report_output = $manual_output_file; | |
52 } | |
53 | |
54 $report_output = $output_dir.$report_output; | |
55 warn "\nWriting Bismark HTML report to >> $report_output <<\n\n"; | |
56 | |
57 my $doc = read_report_template(); #reading and storing the entire report template | |
58 | |
59 # BISMARK ALIGNMENT REPORT (mandatory) | |
60 warn "="x110,"\n"; | |
61 warn "Using the following alignment report:\t\t> $alignment_report <\n"; | |
62 # DEDUPLICATION REPORT (optional) | |
63 if ($dedup_report){ | |
64 warn "Using the following deduplication report:\t> $dedup_report <\n"; | |
65 } | |
66 else{ | |
67 warn "No deduplication report file specified, skipping this step\n"; | |
68 } | |
69 | |
70 # SPLITTING REPORT (optional) | |
71 if ($splitting_report){ | |
72 warn "Using the following splitting report:\t\t> $splitting_report <\n"; | |
73 } | |
74 else{ | |
75 warn "No splitting report file specified, skipping this step\n"; | |
76 } | |
77 | |
78 # M-BIAS REPORT (optional) | |
79 if ($mbias_report){ | |
80 warn "Using the following M-bias report:\t\t> $mbias_report <\n"; | |
81 } | |
82 else{ | |
83 warn "No M-bias report file specified, skipping this step\n"; | |
84 } | |
85 | |
86 # NUCLEOTIDE COVERAGE REPORT (optional) | |
87 if ($nuc_report){ | |
88 warn "Using the following nucleotide coverage report:\t> $nuc_report <\n"; | |
89 } | |
90 else{ | |
91 warn "No nucleotide coverage report file specified, skipping this step\n"; | |
92 } | |
93 warn "="x110,"\n\n\n"; | |
94 $verbose and sleep(3); | |
95 | |
96 # creating timestamp | |
97 $doc = getLoggingTime($doc); | |
98 | |
99 $doc = read_alignment_report($alignment_report,$doc); # mandatory | |
100 | |
101 if ($dedup_report){ # optional | |
102 $doc = read_deduplication_report($dedup_report,$doc); | |
103 | |
104 # removing the delete tags in the html template | |
105 $doc =~ s/\{\{start_deletion_duplication\}\}//g; | |
106 $doc =~ s/\{\{end_deletion_duplication\}\}//g; | |
107 } | |
108 else{ | |
109 # removing the entire graph and table section for the deduplication part | |
110 warn "Removing the duplication section from the HTML report\n" if ($verbose); | |
111 $doc =~ s/(\{\{start_deletion_duplication.*?end_deletion_duplication\}\})//gs; # s includes newline chars; non-greedy | |
112 warn "Deleting the following content:\n$1\n\n" if ($verbose); | |
113 sleep(3) if ($verbose); | |
114 } | |
115 | |
116 if ($nuc_report){ # optional | |
117 $doc = read_nucleotide_coverage_report($nuc_report,$doc); | |
118 | |
119 # removing the delete tags in the html template | |
120 $doc =~ s/\{\{start_deletion_nucleotide_coverage\}\}//g; | |
121 $doc =~ s/\{\{end_deletion_nucleotide_coverage\}\}//g; | |
122 } | |
123 else{ | |
124 # removing the entire graph and table section for the nucleotide coverage part | |
125 warn "Removing the nucleotide coverage section from the HTML report\n" if ($verbose); | |
126 $doc =~ s/(\{\{start_deletion_nucleotide_coverage.*?end_deletion_nucleotide_coverage\}\})//gs; # s includes newline chars; non-greedy | |
127 warn "Deleting the following content:\n$1\n\n" if ($verbose); | |
128 sleep(3) if ($verbose); | |
129 } | |
130 | |
131 if ($splitting_report){ # optional | |
132 $doc = read_splitting_report($splitting_report,$doc); | |
133 | |
134 # removing the delete tags in the html template | |
135 $doc =~ s/\{\{start_deletion_splitting\}\}//g; | |
136 $doc =~ s/\{\{end_deletion_splitting\}\}//g; | |
137 } | |
138 else{ | |
139 # removing the entire graph and table section for the deduplication part | |
140 warn "Removing the splitting report section from the HTML report\n" if ($verbose); | |
141 $doc =~ s/(\{\{start_deletion_splitting.*?end_deletion_splitting\}\})//gs; # s includes newline chars; non-greedy | |
142 warn "Deleting the following content:\n$1\n\n" if ($verbose); | |
143 sleep(3) if ($verbose); | |
144 } | |
145 | |
146 if ($mbias_report){ # optional | |
147 (my $state, $doc) = read_mbias_report($mbias_report,$doc); | |
148 | |
149 # removing the delete tags in the html template | |
150 $doc =~ s/\{\{start_deletion_mbias\}\}//g; | |
151 $doc =~ s/\{\{end_deletion_mbias\}\}//g; | |
152 | |
153 warn "Reported read state: $state\n\n" if ($verbose); | |
154 # if the report was single-end we need to remove the second plot only | |
155 if ($state eq 'single'){ | |
156 $doc =~ s/(\{\{start_deletion_mbias_2.*?end_deletion_mbias_2\}\})//gs; # s includes newline chars; non-greedy | |
157 warn "Deleting the following content:\n$1\n\n" if ($verbose); | |
158 } | |
159 else{ | |
160 $doc =~ s/\{\{start_deletion_mbias_2\}\}//g; | |
161 $doc =~ s/\{\{end_deletion_mbias_2\}\}//g; | |
162 } | |
163 } | |
164 else{ | |
165 # removing the entire graph and table section for the deduplication part | |
166 $doc =~ s/(\{\{start_deletion_mbias.*?end_deletion_mbias\}\})//gs; # s includes newline chars; non-greedy | |
167 warn "Deleting the following content:\n$1\n\n" if ($verbose); | |
168 sleep(3) if ($verbose); | |
169 } | |
170 | |
171 write_out_report($report_output,$doc); | |
172 | |
173 } | |
174 | |
175 sub write_out_report{ | |
176 my ($report_output,$doc) = @_; | |
177 open (OUT,'>',$report_output) or die "Failed to write to output file $report_output: $!\n\n"; | |
178 print OUT $doc; | |
179 } | |
180 | |
181 sub getLoggingTime { | |
182 my $doc = shift; | |
183 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); | |
184 | |
185 my $time = sprintf ("%02d:%02d", $hour,$min,$sec); | |
186 my $date = sprintf ("%04d-%02d-%02d", $year+1900,$mon+1,$mday); | |
187 warn "Using Time: $time, and date: $date\n\n" if ($verbose); | |
188 | |
189 $doc =~ s/\{\{date\}\}/$date/g; | |
190 $doc =~ s/\{\{time\}\}/$time/g; | |
191 | |
192 return $doc; | |
193 } | |
194 | |
195 | |
196 sub read_alignment_report{ | |
197 | |
198 my ($alignment_report,$doc) = @_; | |
199 | |
200 warn "Processing alignment report $alignment_report ...\n"; | |
201 open (ALN,$alignment_report) or die "Couldn't read from file $alignment_report: $!\n\n"; | |
202 | |
203 my $unique; | |
204 my $no_aln; | |
205 my $multiple; | |
206 my $no_genomic; | |
207 my $total_seqs; | |
208 my $bismark_version; | |
209 my $input_filename; | |
210 | |
211 my $unique_text; | |
212 my $no_aln_text; | |
213 my $multiple_text; | |
214 my $total_seq_text; | |
215 | |
216 my $total_C_count; | |
217 my ($meth_CpG,$meth_CHG,$meth_CHH,$meth_unknown); | |
218 my ($unmeth_CpG,$unmeth_CHG,$unmeth_CHH,$unmeth_unknown); | |
219 my ($perc_CpG,$perc_CHG,$perc_CHH,$perc_unknown); | |
220 | |
221 my $number_OT; | |
222 my $number_CTOT; | |
223 my $number_CTOB; | |
224 my $number_OB; | |
225 | |
226 while (<ALN>){ | |
227 chomp; | |
228 | |
229 ### General Alignment stats | |
230 if ($_ =~ /^Sequence pairs analysed in total:/ ){ ## Paired-end | |
231 (undef,$total_seqs) = split /\t/; | |
232 print "Total paired seqs: >> $total_seqs <<\n" if ($verbose); | |
233 $total_seq_text = 'Sequence pairs analysed in total'; | |
234 } | |
235 elsif ($_ =~ /^Sequences analysed in total:/ ){ ## Single-end | |
236 (undef,$total_seqs) = split /\t/; | |
237 $total_seq_text = 'Sequences analysed in total'; | |
238 print "total single-end seqs >> $total_seqs <<\n" if ($verbose); | |
239 } | |
240 | |
241 elsif($_ =~ /^Bismark report for: (.*) \(version: (.*)\)/){ | |
242 $input_filename = $1; | |
243 $bismark_version = $2; | |
244 print "Input filename(s) >> $input_filename <<\n" if ($verbose); | |
245 print "Bismark version >> $bismark_version <<\n" if ($verbose); | |
246 } | |
247 | |
248 elsif($_ =~ /^Number of paired-end alignments with a unique best hit:/){ ## Paired-end | |
249 (undef,$unique) = split /\t/; | |
250 print "Unique PE>> $unique <<\n" if ($verbose);; | |
251 $unique_text = 'Paired-end alignments with a unique best hit'; | |
252 } | |
253 elsif($_ =~ /^Number of alignments with a unique best hit from/){ ## Single-end | |
254 (undef,$unique) = split /\t/; | |
255 print "Unique SE>> $unique <<\n" if ($verbose); | |
256 $unique_text = 'Single-end alignments with a unique best hit'; | |
257 } | |
258 | |
259 elsif($_ =~ /^Sequence pairs with no alignments under any condition:/){ ## Paired-end | |
260 (undef,$no_aln) = split /\t/; | |
261 print "No alignment PE >> $no_aln <<\n" if ($verbose); | |
262 $no_aln_text = 'Pairs without alignments under any condition'; | |
263 } | |
264 elsif($_ =~ /^Sequences with no alignments under any condition:/){ ## Single-end | |
265 (undef,$no_aln) = split /\t/; | |
266 print "No alignments SE>> $no_aln <<\n" if ($verbose); | |
267 $no_aln_text = 'Sequences without alignments under any condition'; | |
268 } | |
269 | |
270 elsif($_ =~ /^Sequence pairs did not map uniquely:/){ ## Paired-end | |
271 (undef,$multiple) = split /\t/; | |
272 print "Multiple alignments PE >> $multiple <<\n" if ($verbose); | |
273 $multiple_text = 'Pairs that did not map uniquely'; | |
274 } | |
275 elsif($_ =~ /^Sequences did not map uniquely:/){ ## Single-end | |
276 (undef,$multiple) = split /\t/; | |
277 print "Multiple alignments SE >> $multiple <<\n" if ($verbose); | |
278 $multiple_text = 'Sequences that did not map uniquely'; | |
279 } | |
280 | |
281 elsif($_ =~ /^Sequence pairs which were discarded because genomic sequence could not be extracted:/){ ## Paired-end | |
282 (undef,$no_genomic) = split /\t/; | |
283 print "No genomic sequence PE >> $no_genomic <<\n" if ($verbose); | |
284 } | |
285 elsif($_ =~ /^Sequences which were discarded because genomic sequence could not be extracted:/){ ## Single-end | |
286 (undef,$no_genomic) = split /\t/; | |
287 print "No genomic sequence SE>> $no_genomic <<\n" if ($verbose); | |
288 } | |
289 | |
290 ### Context Methylation | |
291 elsif($_ =~ /^Total number of C/ ){ | |
292 (undef,$total_C_count) = split /\t/; | |
293 print "Total number C >> $total_C_count <<\n" if ($verbose); | |
294 } | |
295 | |
296 elsif($_ =~ /^Total methylated C\'s in CpG context:/ ){ | |
297 (undef,$meth_CpG) = split /\t/; | |
298 print "meth CpG >> $meth_CpG <<\n" if ($verbose); | |
299 } | |
300 elsif($_ =~ /^Total methylated C\'s in CHG context:/ ){ | |
301 (undef,$meth_CHG) = split /\t/; | |
302 print "meth CHG >> $meth_CHG <<\n" if ($verbose); | |
303 } | |
304 elsif($_ =~ /^Total methylated C\'s in CHH context:/ ){ | |
305 (undef,$meth_CHH) = split /\t/; | |
306 print "meth CHH >> $meth_CHH <<\n" if ($verbose); | |
307 } | |
308 elsif($_ =~ /^Total methylated C\'s in Unknown context:/ ){ | |
309 (undef,$meth_unknown) = split /\t/; | |
310 print "meth Unknown >> $meth_unknown <<\n" if ($verbose); | |
311 } | |
312 | |
313 elsif($_ =~ /^Total unmethylated C\'s in CpG context:/ or $_ =~ /^Total C to T conversions in CpG context:/){ | |
314 (undef,$unmeth_CpG) = split /\t/; | |
315 print "unmeth CpG >> $unmeth_CpG <<\n" if ($verbose); | |
316 } | |
317 elsif($_ =~ /^Total unmethylated C\'s in CHG context:/ or $_ =~ /^Total C to T conversions in CHG context:/){ | |
318 (undef,$unmeth_CHG) = split /\t/; | |
319 print "unmeth CHG >> $unmeth_CHG <<\n" if ($verbose); | |
320 } | |
321 elsif($_ =~ /^Total unmethylated C\'s in CHH context:/ or $_ =~ /^Total C to T conversions in CHH context:/){ | |
322 (undef,$unmeth_CHH) = split /\t/; | |
323 print "unmeth CHH >> $unmeth_CHH <<\n"if ($verbose); | |
324 } | |
325 elsif($_ =~ /^Total unmethylated C\'s in Unknown context:/ or $_ =~ /^Total C to T conversions in Unknown context:/){ | |
326 (undef,$unmeth_unknown) = split /\t/; | |
327 print "unmeth Unknown >> $unmeth_unknown <<\n" if ($verbose); | |
328 } | |
329 | |
330 elsif($_ =~ /^C methylated in CpG context:/ ){ | |
331 (undef,$perc_CpG) = split /\t/; | |
332 $perc_CpG =~ s/%//; | |
333 print "percentage CpG >> $perc_CpG <<\n" if ($verbose); | |
334 } | |
335 elsif($_ =~ /^C methylated in CHG context:/ ){ | |
336 (undef,$perc_CHG) = split /\t/; | |
337 $perc_CHG =~ s/%//; | |
338 print "percentage CHG >> $perc_CHG <<\n" if ($verbose); | |
339 } | |
340 elsif($_ =~ /^C methylated in CHH context:/ ){ | |
341 (undef,$perc_CHH) = split /\t/; | |
342 $perc_CHH =~ s/%//; | |
343 print "percentage CHH >> $perc_CHH <<\n" if ($verbose); | |
344 } | |
345 elsif($_ =~ /^C methylated in Unknown context:/ ){ | |
346 (undef,$perc_unknown) = split /\t/; | |
347 $perc_unknown =~ s/%//; | |
348 print "percentage Unknown >> $perc_unknown <<\n" if ($verbose); | |
349 } | |
350 | |
351 | |
352 ### Strand Origin | |
353 | |
354 elsif($_ =~ /^CT\/GA\/CT:/ ){ ## Paired-end | |
355 (undef,$number_OT) = split /\t/; | |
356 print "Number OT PE>> $number_OT <<\n" if ($verbose); | |
357 } | |
358 elsif($_ =~ /^CT\/CT:/ ){ ## Single-end | |
359 (undef,$number_OT) = split /\t/; | |
360 print "Number OT SE>> $number_OT <<\n" if ($verbose); | |
361 } | |
362 | |
363 elsif($_ =~ /^GA\/CT\/CT:/ ){ ## Paired-end | |
364 (undef,$number_CTOT) = split /\t/; | |
365 print "Number CTOT PE >> $number_CTOT <<\n" if ($verbose); | |
366 } | |
367 elsif($_ =~ /^GA\/CT:/ ){ ## Single-end | |
368 (undef,$number_CTOT) = split /\t/; | |
369 print "Number CTOT SE >> $number_CTOT <<\n" if ($verbose); | |
370 } | |
371 | |
372 elsif($_ =~ /^GA\/CT\/GA:/ ){ ## Paired-end | |
373 (undef,$number_CTOB) = split /\t/; | |
374 print "Number CTOB PE >> $number_CTOB <<\n" if ($verbose); | |
375 } | |
376 elsif($_ =~ /^GA\/GA:/ ){ ## Single-end | |
377 (undef,$number_CTOB) = split /\t/; | |
378 print "Number CTOB SE >> $number_CTOB <<\n" if ($verbose); | |
379 } | |
380 | |
381 elsif($_ =~ /^CT\/GA\/GA:/ ){ ## Paired-end | |
382 (undef,$number_OB) = split /\t/; | |
383 print "Number OB PE >> $number_OB <<\n" if ($verbose); | |
384 } | |
385 elsif($_ =~ /^CT\/GA:/ ){ ## Single-end | |
386 (undef,$number_OB) = split /\t/; | |
387 print "Number OB SE >> $number_OB <<\n" if ($verbose); | |
388 } | |
389 | |
390 | |
391 } | |
392 | |
393 if (defined $unique and defined $no_aln and defined $multiple and defined $no_genomic and defined $total_seqs){ | |
394 warn "Got all necessary information, editing HTML report\n" if ($verbose); | |
395 | |
396 ### General Alignment Stats | |
397 $doc =~ s/\{\{unique_seqs\}\}/$unique/g; | |
398 $doc =~ s/\{\{unique_seqs_text\}\}/$unique_text/g; | |
399 | |
400 $doc =~ s/\{\{no_alignments\}\}/$no_aln/g; | |
401 $doc =~ s/\{\{no_alignments_text\}\}/$no_aln_text/g; | |
402 | |
403 $doc =~ s/\{\{multiple_alignments\}\}/$multiple/g; | |
404 $doc =~ s/\{\{multiple_alignments_text\}\}/$multiple_text/g; | |
405 | |
406 $doc =~ s/\{\{no_genomic\}\}/$no_genomic/g; | |
407 | |
408 $doc =~ s/\{\{total_sequences_alignments\}\}/$total_seqs/g; | |
409 $doc =~ s/\{\{sequences_analysed_in_total\}\}/$total_seq_text/g; | |
410 | |
411 $doc =~ s/\{\{filename\}\}/$input_filename/g; | |
412 $doc =~ s/\{\{bismark_version\}\}/$bismark_version/g; | |
413 | |
414 ### Strand Origin | |
415 $doc =~ s/\{\{number_OT\}\}/$number_OT/g; | |
416 $doc =~ s/\{\{number_CTOT\}\}/$number_CTOT/g; | |
417 $doc =~ s/\{\{number_CTOB\}\}/$number_CTOB/g; | |
418 $doc =~ s/\{\{number_OB\}\}/$number_OB/g; | |
419 | |
420 ### Context Methylation | |
421 $doc =~ s/\{\{total_C_count\}\}/$total_C_count/g; | |
422 | |
423 unless (defined $perc_CpG){ | |
424 $perc_CpG = 'N/A'; | |
425 } | |
426 unless (defined $perc_CHG){ | |
427 $perc_CHG = 'N/A'; | |
428 } | |
429 unless (defined $perc_CHH){ | |
430 $perc_CHH = 'N/A'; | |
431 } | |
432 unless (defined $perc_unknown){ | |
433 $perc_unknown = 'N/A'; | |
434 } | |
435 | |
436 ### Unknown sequence context, just for Bowtie 2 alignments | |
437 my $meth_unknown_inject; | |
438 my $unmeth_unknown_inject; | |
439 my $perc_unknown_inject; | |
440 | |
441 if (defined $meth_unknown){ # if one Unknown context file is present, so should the others | |
442 $meth_unknown_inject = " <tr> | |
443 <th>Methylated C's in Unknown context</th> | |
444 <td>$meth_unknown</td> | |
445 </tr>"; | |
446 $unmeth_unknown_inject = " <tr> | |
447 <th>Unmethylated C's in Unknown context</th> | |
448 <td>$unmeth_unknown</td> | |
449 </tr>"; | |
450 $perc_unknown_inject = " <tr> | |
451 <th>Methylated C's in Unknown context</th> | |
452 <td>$perc_unknown%</td> | |
453 </tr>"; | |
454 } | |
455 else{ | |
456 $meth_unknown_inject = $unmeth_unknown_inject = $perc_unknown_inject = ''; | |
457 } | |
458 | |
459 ### injecting this into the table | |
460 $doc =~ s/\{\{meth_unknown\}\}/$meth_unknown_inject/g; | |
461 $doc =~ s/\{\{unmeth_unknown\}\}/$unmeth_unknown_inject/g; | |
462 $doc =~ s/\{\{perc_unknown\}\}/$perc_unknown_inject/g; | |
463 | |
464 | |
465 $doc =~ s/\{\{meth_CpG\}\}/$meth_CpG/g; | |
466 $doc =~ s/\{\{meth_CHG\}\}/$meth_CHG/g; | |
467 $doc =~ s/\{\{meth_CHH\}\}/$meth_CHH/g; | |
468 | |
469 $doc =~ s/\{\{unmeth_CpG\}\}/$unmeth_CpG/g; | |
470 $doc =~ s/\{\{unmeth_CHG\}\}/$unmeth_CHG/g; | |
471 $doc =~ s/\{\{unmeth_CHH\}\}/$unmeth_CHH/g; | |
472 | |
473 $doc =~ s/\{\{perc_CpG\}\}/$perc_CpG/g; | |
474 $doc =~ s/\{\{perc_CHG\}\}/$perc_CHG/g; | |
475 $doc =~ s/\{\{perc_CHH\}\}/$perc_CHH/g; | |
476 | |
477 my ($perc_CpG_graph, $perc_CHG_graph,$perc_CHH_graph,$perc_unknown_graph); | |
478 | |
479 if ($perc_CpG eq 'N/A'){ | |
480 $perc_CpG_graph = 0; # values of 0 won't show in the graph and won't produce errors | |
481 } | |
482 else{ | |
483 $perc_CpG_graph = $perc_CpG; | |
484 } | |
485 | |
486 if ($perc_CHG eq 'N/A'){ | |
487 $perc_CHG_graph = 0; # values of 0 won't show in the graph and won't produce errors | |
488 } | |
489 else{ | |
490 $perc_CHG_graph = $perc_CHG; | |
491 } | |
492 | |
493 if ($perc_CHH eq 'N/A'){ | |
494 $perc_CHH_graph = 0; # values of 0 won't show in the graph and won't produce errors | |
495 } | |
496 else{ | |
497 $perc_CHH_graph = $perc_CHH; | |
498 } | |
499 | |
500 if ($perc_unknown eq 'N/A'){ | |
501 $perc_unknown_graph = 0; # values of 0 won't show in the graph and won't produce errors | |
502 } | |
503 else{ | |
504 $perc_unknown_graph = $perc_unknown; | |
505 } | |
506 | |
507 $doc =~ s/\{\{perc_CpG_graph\}\}/$perc_CpG_graph/g; | |
508 $doc =~ s/\{\{perc_CHG_graph\}\}/$perc_CHG_graph/g; | |
509 $doc =~ s/\{\{perc_CHH_graph\}\}/$perc_CHH_graph/g; | |
510 | |
511 } | |
512 else{ | |
513 warn "Am I missing something?\n\n"; | |
514 } | |
515 | |
516 warn "Complete\n\n"; | |
517 return $doc; | |
518 } | |
519 | |
520 | |
521 sub read_deduplication_report{ | |
522 | |
523 my ($dedup_report,$doc) = @_; | |
524 | |
525 warn "Processing deduplication report $dedup_report ...\n"; | |
526 open (DEDUP,$dedup_report) or die "Couldn't read from file $dedup_report: $!\n\n"; | |
527 | |
528 my $total_seqs; | |
529 my $dups; | |
530 my $diff_pos; | |
531 my $leftover; | |
532 | |
533 while (<DEDUP>){ | |
534 chomp; | |
535 if ($_ =~ /^Total number of alignments/){ | |
536 (undef,$total_seqs) = split /\t/; | |
537 warn "Total number of seqs >> $total_seqs <<\n" if ($verbose); | |
538 } | |
539 elsif($_ =~ /^Total number duplicated/){ | |
540 (undef,$dups) = split /\t/; | |
541 $dups =~ s/\s.*//; # just need the number, not the percentage | |
542 warn "Duplicated >> $dups <<\n" if ($verbose); | |
543 } | |
544 elsif($_ =~ /^Duplicated alignments were found at/){ | |
545 (undef,$diff_pos) = split /\t/; | |
546 $diff_pos =~ s/\s.*//; # just need the number | |
547 warn "Different positions >> $diff_pos <<\n" if ($verbose); | |
548 } | |
549 elsif($_ =~ /^Total count of deduplicated leftover sequences: (\d+)/){ | |
550 $leftover = $1; | |
551 warn "Leftover seqs >> $leftover <<\n" if ($verbose); | |
552 } | |
553 } | |
554 | |
555 unless (defined $leftover){ | |
556 if (defined $dups and defined $total_seqs){ | |
557 $leftover = $total_seqs - $dups; | |
558 } | |
559 } | |
560 | |
561 # Checking if we got all we need | |
562 if (defined $dups and defined $total_seqs and defined $diff_pos and defined $leftover){ | |
563 # warn "Got all I need!\n\n"; | |
564 $doc =~ s/\{\{seqs_total_duplicates\}\}/$total_seqs/g; | |
565 $doc =~ s/\{\{unique_alignments_duplicates\}\}/$leftover/g; | |
566 $doc =~ s/\{\{duplicate_alignments_duplicates\}\}/$dups/g; | |
567 $doc =~ s/\{\{different_positions_duplicates\}\}/$diff_pos/g; | |
568 } | |
569 else{ | |
570 warn "Something went wrong... Use --verbose to get a clue...\n"; | |
571 # skipping this plot entirely if values could not be extracted | |
572 return $doc; | |
573 } | |
574 warn "Complete\n\n"; | |
575 return $doc; | |
576 } | |
577 | |
578 | |
579 sub read_nucleotide_coverage_report{ | |
580 | |
581 my ($nuc_report,$doc) = @_; | |
582 | |
583 warn "Processing nucleotide coverage report '$nuc_report' ...\n"; | |
584 open (NUC,$nuc_report) or die "Couldn't read from file $nuc_report: $!\n\n"; | |
585 | |
586 my %nucs; # storing nucleotides and frequencies | |
587 my $linecount = 0; | |
588 | |
589 while (<NUC>){ | |
590 chomp; | |
591 $_ =~ s/\r//; # removing carriage returns | |
592 # warn "$_\n"; sleep(1); | |
593 my ($element,$count_obs,$observed,$count_exp,$expected,$coverage) = (split /\t/); | |
594 # warn "$element , $count_obs , $observed , $count_exp , $expected, $coverage\n"; sleep(1); | |
595 if ($linecount == 0){ # verifying that the data appears to be a Bismark nucleotide coverage report | |
596 if ($observed eq 'percent sample'){ | |
597 # warn "Fine, found '$observed'\n"; | |
598 } | |
599 else{ | |
600 die "Expected to find 'percent sample' as entry in line 1, column 3 but found '$observed'. This doesn't look like a Bismark nucleotide coverage report. Please respecify!\n"; | |
601 } | |
602 | |
603 if ($expected eq 'percent genomic'){ | |
604 # warn "Fine, found '$expected'\n"; | |
605 } | |
606 else{ | |
607 die "Expected to find 'percent genomic' as entry in line 1, column 5 but found '$expected'. This doesn't look like a Bismark nucleotide coverage report. Please respecify!\n"; | |
608 } | |
609 } | |
610 else{ | |
611 $nucs{$element}->{obs}->{percent} = $observed; | |
612 $nucs{$element}->{exp}->{percent} = $expected; | |
613 $nucs{$element}->{obs}->{counts} = $count_obs; | |
614 $nucs{$element}->{exp}->{counts} = $count_exp; | |
615 $nucs{$element}->{obs}->{coverage} = $coverage; # coverage of that nucleotide in the sample | |
616 warn "Element '$element' observed: $observed\n" if $verbose; | |
617 warn "Element '$element' expected: $expected\n" if $verbose; | |
618 } | |
619 | |
620 ++$linecount; | |
621 | |
622 } | |
623 | |
624 # Checking if we got all we need | |
625 my $looksOK = 1; | |
626 foreach my $key (keys %nucs){ | |
627 unless ( (defined $nucs{$key}->{obs}) and (defined $nucs{$key}->{exp})){ | |
628 $looksOK = 0; | |
629 } | |
630 } | |
631 | |
632 if ($looksOK){ | |
633 warn "Got all necessary information, editing HTML report ...\n" if $verbose; | |
634 my $minmax = 0; | |
635 foreach my $key (sort {$a cmp $b} keys %nucs){ | |
636 my $nuc_obs = $nucs{$key}->{obs}->{percent}; | |
637 my $nuc_exp = $nucs{$key}->{exp}->{percent}; | |
638 my $counts_obs = $nucs{$key}->{obs}->{counts}; | |
639 my $counts_exp = $nucs{$key}->{exp}->{counts}; | |
640 my $cov = $nucs{$key}->{obs}->{coverage}; | |
641 | |
642 # calculating log2 observed/expected | |
643 my $ratio = $nuc_obs/$nuc_exp; | |
644 # my $logratio = sprintf ("%.2f",log($ratio)/log(2)); | |
645 # if (abs($logratio) > $minmax){ | |
646 # $minmax = abs($logratio); | |
647 # } | |
648 warn "$key\tnuc_${key}_obs\t$nuc_obs\tnuc_${key}_exp\t$nuc_exp\tratio: $ratio\n" if $verbose; | |
649 | |
650 $doc =~ s/\{\{nuc_${key}_p_obs\}\}/$nuc_obs/g; | |
651 $doc =~ s/\{\{nuc_${key}_p_exp\}\}/$nuc_exp/g; | |
652 $doc =~ s/\{\{nuc_${key}_counts_obs\}\}/$counts_obs/g; | |
653 $doc =~ s/\{\{nuc_${key}_counts_exp\}\}/$counts_exp/g; | |
654 $doc =~ s/\{\{nuc_${key}_coverage\}\}/$cov/g; | |
655 } | |
656 # warn "Minimum/maxium ratio was: $minmax\n" if $verbose; | |
657 # $doc =~ s/\{\{nuc_minmax\}\}/$minmax/g; | |
658 } | |
659 else{ | |
660 warn "Something went wrong, skipping this plot entirely... Use --verbose to get a clue...\n"; | |
661 # skipping this plot entirely if values could not be extracted | |
662 return $doc; | |
663 } | |
664 | |
665 warn "Complete\n\n"; | |
666 return $doc; | |
667 } | |
668 | |
669 | |
670 sub read_splitting_report{ | |
671 | |
672 my ($splitting_report,$doc) = @_; | |
673 | |
674 warn "Processing splitting report $splitting_report ...\n"; | |
675 open (SPLIT,$splitting_report) or die "Couldn't read from file $splitting_report: $!\n\n"; | |
676 | |
677 my $total_seqs; | |
678 | |
679 my $total_C_count; | |
680 my ($meth_CpG,$meth_CHG,$meth_CHH,$meth_unknown); | |
681 my ($unmeth_CpG,$unmeth_CHG,$unmeth_CHH,$unmeth_unknown); | |
682 my ($perc_CpG,$perc_CHG,$perc_CHH,$perc_unknown); | |
683 | |
684 while (<SPLIT>){ | |
685 chomp; | |
686 | |
687 ### Context Methylation | |
688 if($_ =~ /^Total number of C/ ){ | |
689 (undef,$total_C_count) = split /\t/; | |
690 print "total calls >> $total_C_count <<\n" if ($verbose); | |
691 } | |
692 | |
693 elsif($_ =~ /^Total methylated C\'s in CpG context:/ ){ | |
694 (undef,$meth_CpG) = split /\t/; | |
695 print "meth CpG >> $meth_CpG <<\n" if ($verbose); | |
696 } | |
697 elsif($_ =~ /^Total methylated C\'s in CHG context:/ ){ | |
698 (undef,$meth_CHG) = split /\t/; | |
699 print "meth CHG>> $meth_CHG <<\n" if ($verbose); | |
700 } | |
701 elsif($_ =~ /^Total methylated C\'s in CHH context:/ ){ | |
702 (undef,$meth_CHH) = split /\t/; | |
703 print "meth CHH >> $meth_CHH <<\n" if ($verbose); | |
704 } | |
705 elsif($_ =~ /^Total methylated C\'s in Unknown context:/ ){ | |
706 (undef,$meth_unknown) = split /\t/; | |
707 print "meth Unknown >> $meth_unknown <<\n" if ($verbose); | |
708 } | |
709 | |
710 elsif($_ =~ /^Total C to T conversions in CpG context:/ ){ | |
711 (undef,$unmeth_CpG) = split /\t/; | |
712 print "unmeth CpG >> $unmeth_CpG <<\n" if ($verbose); | |
713 } | |
714 elsif($_ =~ /^Total C to T conversions in CHG context:/ ){ | |
715 (undef,$unmeth_CHG) = split /\t/; | |
716 print "unmeth CHG >> $unmeth_CHG <<\n" if ($verbose); | |
717 } | |
718 elsif($_ =~ /^Total C to T conversions in CHH context:/ ){ | |
719 (undef,$unmeth_CHH) = split /\t/; | |
720 print "unmeth CHH >> $unmeth_CHH <<\n" if ($verbose); | |
721 } | |
722 elsif($_ =~ /^Total C to T conversions in Unknown context:/ ){ | |
723 (undef,$unmeth_unknown) = split /\t/; | |
724 print "unmeth Unknown >> $unmeth_unknown <<\n" if ($verbose); | |
725 } | |
726 | |
727 elsif($_ =~ /^C methylated in CpG context:/ ){ | |
728 (undef,$perc_CpG) = split /\t/; | |
729 $perc_CpG =~ s/%//; | |
730 print "percentage CpG >> $perc_CpG <<\n" if ($verbose); | |
731 } | |
732 elsif($_ =~ /^C methylated in CHG context:/ ){ | |
733 (undef,$perc_CHG) = split /\t/; | |
734 $perc_CHG =~ s/%//; | |
735 print "percentage CHG >> $perc_CHG <<\n" if ($verbose); | |
736 } | |
737 elsif($_ =~ /^C methylated in CHH context:/ ){ | |
738 (undef,$perc_CHH) = split /\t/; | |
739 $perc_CHH =~ s/%//; | |
740 print "percentage CHH >> $perc_CHH <<\n" if ($verbose); | |
741 } | |
742 elsif($_ =~ /^C methylated in Unknown context:/ ){ | |
743 (undef,$perc_unknown) = split /\t/; | |
744 $perc_unknown =~ s/%//; | |
745 print "percentage unknown >> $perc_unknown <<\n" if ($verbose); | |
746 } | |
747 } | |
748 | |
749 if (defined $meth_CpG and defined $meth_CHG and defined $meth_CHH and defined $unmeth_CpG and defined $unmeth_CHG and defined $unmeth_CHH){ | |
750 warn "Got all necessary information, editing HTML report ...\n" if ($verbose); | |
751 | |
752 ### Context Methylation | |
753 $doc =~ s/\{\{total_C_count_splitting\}\}/$total_C_count/g; | |
754 | |
755 $doc =~ s/\{\{meth_CpG_splitting\}\}/$meth_CpG/g; | |
756 $doc =~ s/\{\{meth_CHG_splitting\}\}/$meth_CHG/g; | |
757 $doc =~ s/\{\{meth_CHH_splitting\}\}/$meth_CHH/g; | |
758 | |
759 $doc =~ s/\{\{unmeth_CpG_splitting\}\}/$unmeth_CpG/g; | |
760 $doc =~ s/\{\{unmeth_CHG_splitting\}\}/$unmeth_CHG/g; | |
761 $doc =~ s/\{\{unmeth_CHH_splitting\}\}/$unmeth_CHH/g; | |
762 | |
763 unless (defined $perc_CpG){ | |
764 $perc_CpG = 'N/A'; | |
765 } | |
766 unless (defined $perc_CHG){ | |
767 $perc_CHG = 'N/A'; | |
768 } | |
769 unless (defined $perc_CHH){ | |
770 $perc_CHH = 'N/A'; | |
771 } | |
772 unless (defined $perc_unknown){ | |
773 $perc_unknown = 'N/A'; | |
774 } | |
775 | |
776 ### Unknown sequence context, just for Bowtie 2 alignments | |
777 my $meth_unknown_inject; | |
778 my $unmeth_unknown_inject; | |
779 my $perc_unknown_inject; | |
780 | |
781 if (defined $meth_unknown){ # if one Unknown context file is present, so should the others | |
782 $meth_unknown_inject = " <tr> | |
783 <th>Methylated C's in Unknown context</th> | |
784 <td>$meth_unknown</td> | |
785 </tr>"; | |
786 $unmeth_unknown_inject = " <tr> | |
787 <th>Unmethylated C's in Unknown context</th> | |
788 <td>$unmeth_unknown</td> | |
789 </tr>"; | |
790 $perc_unknown_inject = " <tr> | |
791 <th>Methylated C's in Unknown context</th> | |
792 <td>$perc_unknown%</td> | |
793 </tr>"; | |
794 } | |
795 else{ | |
796 $meth_unknown_inject = $unmeth_unknown_inject = $perc_unknown_inject = ''; | |
797 } | |
798 | |
799 ### injecting this into the table | |
800 $doc =~ s/\{\{meth_unknown_splitting\}\}/$meth_unknown_inject/g; | |
801 $doc =~ s/\{\{unmeth_unknown_splitting\}\}/$unmeth_unknown_inject/g; | |
802 $doc =~ s/\{\{perc_unknown_splitting\}\}/$perc_unknown_inject/g; | |
803 | |
804 # for the graph we need to take care that there are no N/A values in the percentage fields | |
805 my ($perc_CpG_graph, $perc_CHG_graph,$perc_CHH_graph,$perc_unknown_graph); | |
806 | |
807 if ($perc_CpG eq 'N/A'){ | |
808 $perc_CpG_graph = 0; # values of 0 won't show in the graph and won't produce errors | |
809 } | |
810 else{ | |
811 $perc_CpG_graph = $perc_CpG; | |
812 } | |
813 | |
814 if ($perc_CHG eq 'N/A'){ | |
815 $perc_CHG_graph = 0; # values of 0 won't show in the graph and won't produce errors | |
816 } | |
817 else{ | |
818 $perc_CHG_graph = $perc_CHG; | |
819 } | |
820 | |
821 if ($perc_CHH eq 'N/A'){ | |
822 $perc_CHH_graph = 0; # values of 0 won't show in the graph and won't produce errors | |
823 } | |
824 else{ | |
825 $perc_CHH_graph = $perc_CHH; | |
826 } | |
827 | |
828 if ($perc_unknown eq 'N/A'){ | |
829 $perc_unknown_graph = 0; # values of 0 won't show in the graph and won't produce errors | |
830 } | |
831 else{ | |
832 $perc_unknown_graph = $perc_unknown; | |
833 } | |
834 | |
835 $doc =~ s/\{\{perc_CpG_graph_splitting\}\}/$perc_CpG_graph/g; | |
836 $doc =~ s/\{\{perc_CHG_graph_splitting\}\}/$perc_CHG_graph/g; | |
837 $doc =~ s/\{\{perc_CHH_graph_splitting\}\}/$perc_CHH_graph/g; | |
838 | |
839 $doc =~ s/\{\{perc_CpG_splitting\}\}/$perc_CpG/g; | |
840 $doc =~ s/\{\{perc_CHG_splitting\}\}/$perc_CHG/g; | |
841 $doc =~ s/\{\{perc_CHH_splitting\}\}/$perc_CHH/g; | |
842 } | |
843 else{ | |
844 warn "Am I missing something? Try using --verbose to get a clue...\n\n"; | |
845 } | |
846 warn "Complete\n\n"; | |
847 | |
848 return $doc; | |
849 | |
850 } | |
851 | |
852 | |
853 sub read_mbias_report{ | |
854 | |
855 my ($mbias_report,$doc) = @_; | |
856 | |
857 warn "Processing M-bias report $mbias_report ...\n"; | |
858 open (MBIAS,$mbias_report) or die "Couldn't read from file $mbias_report: $!\n\n"; | |
859 | |
860 my %mbias_1; | |
861 my %mbias_2; | |
862 | |
863 my $context; | |
864 my $read_identity; | |
865 my $state = 'single'; # setting this to 'single' if there is no read 2 | |
866 | |
867 while (<MBIAS>){ | |
868 chomp; | |
869 if ($_ =~ /^(C.{2}) context/){ | |
870 $context = $1; | |
871 | |
872 if ($_ =~ /R2/){ | |
873 $read_identity = 2; | |
874 $state = 'paired'; | |
875 } | |
876 else{ | |
877 $read_identity = 1; | |
878 } | |
879 | |
880 # warn "new context is: $context\n"; | |
881 # warn "Read identity is: Read $read_identity\n"; | |
882 } | |
883 if ($_ =~ /^\d/){ | |
884 my ($pos,$meth,$unmeth,$perc,$coverage) = (split /\t/); | |
885 if ($read_identity == 1){ | |
886 push @{$mbias_1{$context}->{coverage}}, "[$pos, $coverage]"; | |
887 push @{$mbias_1{$context}->{perc}}, "[$pos, $perc]"; | |
888 } | |
889 elsif ($read_identity == 2){ | |
890 push @{$mbias_2{$context}->{coverage}}, "[$pos, $coverage]"; | |
891 push @{$mbias_2{$context}->{perc}}, "[$pos, $perc]"; | |
892 } | |
893 else{ | |
894 warn "read identity was unknown : '$read_identity'\n\n"; | |
895 } | |
896 | |
897 # print join (" ",$pos,$meth,$unmeth,$perc,$coverage)."\n"; | |
898 } | |
899 } | |
900 | |
901 # Read 1 M-bias | |
902 my $r1_CpG_coverage = join (',',@{$mbias_1{'CpG'}->{coverage}}); | |
903 my $r1_CpG_perc = join (',',@{$mbias_1{'CpG'}->{perc}}); | |
904 | |
905 my $r1_CHG_coverage = join (',',@{$mbias_1{'CHG'}->{coverage}}); | |
906 my $r1_CHG_perc = join (',',@{$mbias_1{'CHG'}->{perc}}); | |
907 | |
908 my $r1_CHH_coverage = join (',',@{$mbias_1{'CHH'}->{coverage}}); | |
909 my $r1_CHH_perc = join (',',@{$mbias_1{'CHH'}->{perc}}); | |
910 | |
911 $doc =~ s/\{\{CpG_total_calls_R1\}\}/$r1_CpG_coverage/g; | |
912 $doc =~ s/\{\{CHG_total_calls_R1\}\}/$r1_CHG_coverage/g; | |
913 $doc =~ s/\{\{CHH_total_calls_R1\}\}/$r1_CHH_coverage/g; | |
914 | |
915 $doc =~ s/\{\{CpG_methylation_R1\}\}/$r1_CpG_perc/g; | |
916 $doc =~ s/\{\{CHG_methylation_R1\}\}/$r1_CHG_perc/g; | |
917 $doc =~ s/\{\{CHH_methylation_R1\}\}/$r1_CHH_perc/g; | |
918 | |
919 # Read 2 M-bias | |
920 if (%mbias_2){ | |
921 my $r2_CpG_coverage = join (',',@{$mbias_2{'CpG'}->{coverage}}); | |
922 my $r2_CpG_perc = join (',',@{$mbias_2{'CpG'}->{perc}}); | |
923 | |
924 my $r2_CHG_coverage = join (',',@{$mbias_2{'CHG'}->{coverage}}); | |
925 my $r2_CHG_perc = join (',',@{$mbias_2{'CHG'}->{perc}}); | |
926 | |
927 my $r2_CHH_coverage = join (',',@{$mbias_2{'CHH'}->{coverage}}); | |
928 my $r2_CHH_perc = join (',',@{$mbias_2{'CHH'}->{perc}}); | |
929 | |
930 $doc =~ s/\{\{CpG_total_calls_R2\}\}/$r2_CpG_coverage/g; | |
931 $doc =~ s/\{\{CHG_total_calls_R2\}\}/$r2_CHG_coverage/g; | |
932 $doc =~ s/\{\{CHH_total_calls_R2\}\}/$r2_CHH_coverage/g; | |
933 | |
934 $doc =~ s/\{\{CpG_methylation_R2\}\}/$r2_CpG_perc/g; | |
935 $doc =~ s/\{\{CHG_methylation_R2\}\}/$r2_CHG_perc/g; | |
936 $doc =~ s/\{\{CHH_methylation_R2\}\}/$r2_CHH_perc/g; | |
937 } | |
938 warn "Complete\n\n"; | |
939 | |
940 return ($state,$doc); | |
941 | |
942 } | |
943 | |
944 | |
945 sub read_report_template{ | |
946 my $doc; | |
947 warn "Attempting to open file from: $Bin/bismark_sitrep.tpl\n\n" if ($verbose); | |
948 open (DOC,"$Bin/bismark_sitrep.tpl") or die $!; | |
949 while(<DOC>){ | |
950 chomp; | |
951 $_ =~ s/\r//g; | |
952 $doc .= $_."\n"; | |
953 } | |
954 | |
955 close DOC or die $!; | |
956 return $doc; | |
957 } | |
958 | |
959 | |
960 | |
961 sub process_commandline{ | |
962 my $help; | |
963 my $output_dir; | |
964 my $manual_output_file; | |
965 my $alignment_report; | |
966 my $dedup_report; | |
967 my $splitting_report; | |
968 my $mbias_report; | |
969 my $nucleotide_coverage_report; # stores nucleotide coverage statistics | |
970 my $verbose; | |
971 | |
972 my $version; | |
973 | |
974 my $command_line = GetOptions ('help|man' => \$help, | |
975 'dir=s' => \$output_dir, | |
976 'o|output=s' => \$manual_output_file, | |
977 'alignment_report=s' => \$alignment_report, | |
978 'dedup_report=s' => \$dedup_report, | |
979 'splitting_report=s' => \$splitting_report, | |
980 'mbias_report=s' => \$mbias_report, | |
981 'nucleotide_report=s' => \$nucleotide_coverage_report, | |
982 'version' => \$version, | |
983 'verbose' => \$verbose, | |
984 ); | |
985 | |
986 ### EXIT ON ERROR if there were errors with any of the supplied options | |
987 unless ($command_line){ | |
988 die "Please respecify command line options\n"; | |
989 } | |
990 | |
991 ### HELPFILE | |
992 if ($help){ | |
993 print_helpfile(); | |
994 exit; | |
995 } | |
996 | |
997 if ($version){ | |
998 print << "VERSION"; | |
999 | |
1000 | |
1001 Bismark HTML Report Module | |
1002 | |
1003 bismark2report version: $bismark2report_version | |
1004 Copyright 2010-16 Felix Krueger | |
1005 Babraham Bioinformatics | |
1006 www.bioinformatics.babraham.ac.uk/projects/bismark/ | |
1007 | |
1008 | |
1009 VERSION | |
1010 exit; | |
1011 } | |
1012 | |
1013 ### OUTPUT DIR PATH | |
1014 if (defined $output_dir){ | |
1015 unless ($output_dir eq ''){ # if the output dir has been passed on by the methylation extractor and is an empty string we don't want to change it | |
1016 unless ($output_dir =~ /\/$/){ | |
1017 $output_dir =~ s/$/\//; | |
1018 } | |
1019 } | |
1020 } | |
1021 else{ | |
1022 $output_dir = ''; | |
1023 } | |
1024 | |
1025 | |
1026 ## First we are looking for alignment reports, and then look whether there are any optional plots with the same base name | |
1027 | |
1028 if ($alignment_report){ | |
1029 ### we only process the one alignment report (and possibly the other ones as well) that was specified | |
1030 push @alignment_reports, $alignment_report; | |
1031 } | |
1032 else{ ### no alignment report specified, looking in the current directory for files ending in *E_report.txt (SE or PE) | |
1033 | |
1034 ### looking in the current directory for report files. Less than 1 report file is not allowed | |
1035 @alignment_reports = <*E_report.txt>; | |
1036 | |
1037 if (scalar @alignment_reports == 0){ | |
1038 warn "Found no potential alignment reports in the current directory. Please specify a single Bismark alignment report file using the option '--alignment_report FILE'\n\n"; | |
1039 print_helpfile(); | |
1040 exit; | |
1041 } | |
1042 else{ | |
1043 # there are Bismark alignment report(s) in the directory | |
1044 warn "Found ",scalar @alignment_reports," alignment reports in current directory. Now trying to figure out whether there are corresponding optional reports\n"; | |
1045 } | |
1046 } | |
1047 | |
1048 ### Ensuring that there isn't more than 1 file in @alignment_reports if someone manually specified an output file. | |
1049 if (scalar @alignment_reports > 1){ | |
1050 if (defined $manual_output_file){ | |
1051 die "You cannot run bismark2report on more than 1 file while specifying a single output file. Either lose the option -o to derive the output filenames automatically, or specify a single Bismark alignment report file using the option '--alignment_report FILE'\n\n"; | |
1052 } | |
1053 } | |
1054 | |
1055 foreach my $aln (@alignment_reports){ | |
1056 | |
1057 # warn "Figuring things out for:\n$aln\n"; | |
1058 $aln =~ /^(.+)_(P|S)E_report.txt$/; | |
1059 my $basename = $1; | |
1060 # warn "using this basename:\n$basename\n\n"; | |
1061 | |
1062 ### DEDUPLICATION REPORT (optional) | |
1063 if ($dedup_report){ | |
1064 warn "User specifified dedup report: $dedup_report\n";sleep(3); | |
1065 if (lc$dedup_report eq 'none'){ | |
1066 push @dedup_reports, ''; # user may wish to skip this step by specifying 'none' | |
1067 } | |
1068 else{ | |
1069 push @dedup_reports, $dedup_report; | |
1070 } | |
1071 } | |
1072 else{ | |
1073 ### looking in the current directory for a report file with the same base name | |
1074 my @dedup_report_files = <$basename*deduplication_report.txt>; | |
1075 # warn "Number of deduplication reports found in current directory for basename $1: ", scalar @dedup_report_files , "\n"; | |
1076 | |
1077 if (scalar @dedup_report_files > 1){ | |
1078 die "Found ", scalar @dedup_report_files ," potential deduplication reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--dedup_report FILE' or otherwise provide filenames that are easier to figure out...\n\n"; | |
1079 } | |
1080 elsif (scalar @dedup_report_files == 0){ | |
1081 push @dedup_reports, ''; # no corresponding deduplication report found | |
1082 } | |
1083 else{ | |
1084 # there is only a single deduplication report in the current directory, using this one | |
1085 $dedup_report = shift @dedup_report_files; | |
1086 push @dedup_reports, $dedup_report; | |
1087 # warn "Going to use this dedup report: $dedup_report\n\n"; | |
1088 } | |
1089 } | |
1090 | |
1091 ### NUCLEOTIDE COVERAGE REPORT (optional) | |
1092 if (defined $nucleotide_coverage_report){ | |
1093 # warn "User specified nucleotide coverage report: $nucleotide_coverage_report\n";sleep(1); | |
1094 if (lc$nucleotide_coverage_report eq 'none'){ | |
1095 push @nuc_reports, ''; # user may wish to skip this step by specifying 'none' | |
1096 } | |
1097 else{ | |
1098 push @nuc_reports, $nucleotide_coverage_report; | |
1099 } | |
1100 } | |
1101 else{ | |
1102 ### looking in the current directory for a report file with the same base name | |
1103 my @nucleotide_coverage_report_files = <$basename*nucleotide_stats.txt>; | |
1104 # warn "Number of nucleotide coverage statistic reports found in current directory for basename $1: ", scalar @nucleotide_coverage_report_files , "\n"; | |
1105 | |
1106 if (scalar @nucleotide_coverage_report_files > 1){ | |
1107 die "Found ", scalar @nucleotide_coverage_report_files ," potential nucleotide coverage reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--nucleotide_report FILE' or otherwise provide filenames that are easier to figure out...\n\n"; | |
1108 } | |
1109 elsif (scalar @nucleotide_coverage_report_files == 0){ | |
1110 # warn "Found no corresponding nucleotide coverage file\n"; | |
1111 push @nuc_reports, ''; # no corresponding nucleotide coverge file report found | |
1112 } | |
1113 else{ | |
1114 # there is only a single nucleotide coverage report in the current directory, using this one | |
1115 $nucleotide_coverage_report = shift @nucleotide_coverage_report_files; | |
1116 push @nuc_reports, $nucleotide_coverage_report; | |
1117 # warn "Going to use this nucleotide coverage report: $nucleotide_coverage_report\n\n"; sleep(3); | |
1118 } | |
1119 } | |
1120 | |
1121 | |
1122 ### METHYLATION EXTRACTOR SPLITTING REPORT | |
1123 if ($splitting_report){ | |
1124 if (lc$splitting_report eq 'none'){ | |
1125 push @splitting_reports, ''; # user may wish to skip this step by specifying 'none' | |
1126 } | |
1127 else{ | |
1128 push @splitting_reports, $splitting_report; | |
1129 } | |
1130 } | |
1131 else{ | |
1132 ### looking in the current directory for a report file with the same basename | |
1133 my @splitting_report_files = <$basename*splitting_report.txt>; | |
1134 # warn "Number of splitting reports found in current directory: ", scalar @splitting_report_files , "\n"; | |
1135 | |
1136 if (scalar @splitting_report_files > 1){ | |
1137 die "Found ", scalar @splitting_report_files ," potential methylation extractor splitting reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--splitting_report FILE' or otherwise provide filenames that are easier to figure out...\n\n"; | |
1138 } | |
1139 elsif (scalar @splitting_report_files == 0){ | |
1140 push @splitting_reports, ''; # no corresponding methylation extractor report found | |
1141 } | |
1142 else{ | |
1143 # there is only a single splitting report in the current directory, using this one | |
1144 $splitting_report = shift @splitting_report_files; | |
1145 push @splitting_reports, $splitting_report; | |
1146 } | |
1147 } | |
1148 | |
1149 ### M-BIAS REPORT | |
1150 if ($mbias_report){ | |
1151 if (lc$mbias_report eq 'none'){ | |
1152 $mbias_report = ''; # user may wish to skip this step by specifying 'none' | |
1153 push @mbias_reports, $mbias_report; | |
1154 } | |
1155 else{ | |
1156 push @mbias_reports, $mbias_report; | |
1157 } | |
1158 } | |
1159 else{ | |
1160 ### looking in the current directory for a single report file. More (or less) than 1 report file is not allowed | |
1161 my @mbias_report_files = <$basename*M-bias.txt>; | |
1162 | |
1163 # warn "Number of M-bias reports found in current directory: ", scalar @mbias_report_files , "\n"; | |
1164 | |
1165 if (scalar @mbias_report_files > 1){ | |
1166 die "Found ", scalar @mbias_report_files ," potential M-bias reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--mbias_report FILE'or otherwise provide filenames that are easier to figure out automatically ...\n\n"; | |
1167 } | |
1168 elsif (scalar @mbias_report_files == 0){ | |
1169 push @mbias_reports, ''; | |
1170 } | |
1171 else{ | |
1172 # there is only a single M-bias report in the current directory, using this one | |
1173 $mbias_report = shift @mbias_report_files; | |
1174 push @mbias_reports, $mbias_report; | |
1175 } | |
1176 } | |
1177 $dedup_report = $splitting_report = $mbias_report = $nucleotide_coverage_report = undef; | |
1178 } | |
1179 | |
1180 return ($output_dir,$verbose,$manual_output_file); | |
1181 | |
1182 } | |
1183 | |
1184 sub print_helpfile{ | |
1185 print <<EOF | |
1186 | |
1187 SYNOPSIS: | |
1188 | |
1189 This script uses a Bismark alignment report to generate a graphical HTML report page. Optionally, further reports of | |
1190 the Bismark suite such as deduplication, methylation extractor splitting or M-bias reports can be specified as well. If several | |
1191 Bismark reports are found in the same folder, a separate report will be generated for each of these, whereby the output filename | |
1192 will be derived from the Bismark alignment report file. Bismark2report attempts to find optional reports automatically based | |
1193 on the file basename. | |
1194 | |
1195 | |
1196 USAGE: bismark2report [options] | |
1197 | |
1198 | |
1199 -o/--output <filename> Name of the output file (optional). If not specified explicitly, the output filename will be derived | |
1200 from the Bismark alignment report file. Specifying an output filename only works if the HTML report is | |
1201 to be generated for a single Bismark alignment report (and potentially additional reports). | |
1202 | |
1203 --dir Output directory. Output is written to the current directory if not specified explicitly. | |
1204 | |
1205 | |
1206 --alignment_report FILE If not specified explicitly, bismark2report attempts to find Bismark report file(s) in the current | |
1207 directory and produces a separate HTML report for each mapping report file. Based on the basename of | |
1208 the Bismark mapping report, bismark2report will also attempt to find the other Bismark reports (see below) | |
1209 for inclusion into the HTML report. Specifying a Bismark alignment report file is mandatory. | |
1210 | |
1211 --dedup_report FILE If not specified explicitly, bismark2report attempts to find a deduplication report file with the same | |
1212 basename as the Bismark mapping report (generated by deduplicate_bismark) in the | |
1213 current working directory. Including a deduplication report is optional, and using the FILE 'none' | |
1214 will skip this step entirely. | |
1215 | |
1216 --splitting_report FILE If not specified explicitly, bismark2report attempts to find a splitting report file with the same | |
1217 basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current | |
1218 working directory. Including a splitting report is optional, and using the FILE 'none' will skip this | |
1219 step entirely. | |
1220 | |
1221 --mbias_report FILE If not specified explicitly, bismark2report attempts to find a single M-bias report file with the same | |
1222 basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current | |
1223 working directory. Including an M-Bias report is optional, and using the FILE 'none' will skip this step | |
1224 entirely. | |
1225 | |
1226 --nucleotide_report FILE If not specified explicitly, bismark2report attempts to find a single nucleotide coverage report file | |
1227 with the same basename as the Bismark mapping report (generated by Bismark with the option | |
1228 '--nucleotide_coverage') in the current working directory. Including a nucleotide coverage statistics | |
1229 report is optional, and using the FILE 'none' will skip this report entirely. | |
1230 | |
1231 Script last modified: 13 May 2016 | |
1232 | |
1233 EOF | |
1234 ; | |
1235 exit 1; | |
1236 } | |
1237 |