3
|
1 #!/usr/bin/perl
|
|
2
|
|
3 #######
|
|
4 # POD #
|
|
5 #######
|
|
6
|
|
7 =pod
|
|
8
|
|
9 =head1 NAME
|
|
10
|
|
11 C<calc_fastq-stats.pl> - basic statistics for bases and reads in a FASTQ file
|
|
12
|
|
13 =head1 SYNOPSIS
|
|
14
|
|
15 C<perl calc_fastq-stats.pl -i reads.fastq>
|
|
16
|
|
17 B<or>
|
|
18
|
|
19 C<gzip -dc reads.fastq.gz | perl calc_fastq-stats.pl -i ->
|
|
20
|
|
21 =head1 DESCRIPTION
|
|
22
|
|
23 Calculates some simple statistics, like individual and total base
|
|
24 counts, GC content, and basic stats for the read lengths, and
|
|
25 read/base qualities in a FASTQ file. The GC content calculation does
|
|
26 not include 'N's. Stats are printed to C<STDOUT> and optionally to an
|
|
27 output file.
|
|
28
|
|
29 Because the quality of a read degrades over its length with all NGS
|
|
30 machines, it is advisable to also plot the quality for each cycle as
|
|
31 implemented in tools like
|
|
32 L<FastQC|http://www.bioinformatics.babraham.ac.uk/projects/fastqc/>
|
|
33 or the L<fastx-toolkit|http://hannonlab.cshl.edu/fastx_toolkit/>.
|
|
34
|
|
35 If the sequence and the quality values are interrupted by line
|
|
36 breaks (i.e. a read is B<not> represented by four lines), please fix
|
|
37 with Heng Li's L<seqtk|https://github.com/lh3/seqtk>:
|
|
38
|
|
39 C<seqtk seq -l 0 infile.fastq E<gt> outfile.fastq>
|
|
40
|
|
41 An alternative tool, which is a lot faster, is B<fastq-stats> from
|
|
42 L<ea-utils|https://code.google.com/p/ea-utils/>.
|
|
43
|
|
44 =head1 OPTIONS
|
|
45
|
|
46 =head2 Mandatory options
|
|
47
|
|
48 =over 20
|
|
49
|
|
50 =item B<-i>=I<str>, B<-input>=I<str>
|
|
51
|
|
52 Input FASTQ file or piped STDIN (-) from a gzipped file
|
|
53
|
|
54 =item B<-q>=I<int>, B<-qual_offset>=I<int>
|
|
55
|
|
56 ASCII quality offset of the Phred (Sanger) quality values [default 33]
|
|
57
|
|
58 =back
|
|
59
|
|
60 =head2 Optional options
|
|
61
|
|
62 =over 20
|
|
63
|
|
64 =item B<-h>, B<-help>
|
|
65
|
|
66 Help (perldoc POD)
|
|
67
|
|
68 =item B<-c>=I<int>, B<-coverage_limit>=I<int>
|
|
69
|
|
70 Number of bases to sample from the top of the file
|
|
71
|
|
72 =item B<-n>=I<int>, B<-num_read>=I<int>
|
|
73
|
|
74 Number of reads to sample from the top of the file
|
|
75
|
|
76 =item B<-o>=I<str>, B<-output>=I<str>
|
|
77
|
|
78 Print stats in addition to C<STDOUT> to the specified output file
|
|
79
|
|
80 =item B<-v>, B<-version>
|
|
81
|
|
82 Print version number to C<STDERR>
|
|
83
|
|
84 =back
|
|
85
|
|
86 =head1 OUTPUT
|
|
87
|
|
88 =over 20
|
|
89
|
|
90 =item C<STDOUT>
|
|
91
|
|
92 Calculated stats are printed to C<STDOUT>
|
|
93
|
|
94 =item F<(outfile)>
|
|
95
|
|
96 Optional outfile for stats
|
|
97
|
|
98 =back
|
|
99
|
|
100 =head1 EXAMPLES
|
|
101
|
|
102 =over
|
|
103
|
|
104 =item C<zcat reads.fastq.gz | perl calc_fastq-stats.pl -i - -q 64 -c 175000000 -n 3000000>
|
|
105
|
|
106 =back
|
|
107
|
|
108 =head1 DEPENDENCIES
|
|
109
|
|
110 If the following modules are not installed get them from
|
|
111 L<CPAN|http://www.cpan.org/>:
|
|
112
|
|
113 =over 20
|
|
114
|
|
115 =item C<Statistics::Descriptive>
|
|
116
|
|
117 Perl module to calculate basic descriptive statistics
|
|
118
|
|
119 =item C<Statistics::Descriptive::Discrete>
|
|
120
|
|
121 Perl module to calculate descriptive statistics for discrete data sets
|
|
122
|
|
123 =item C<Statistics::Descriptive::Weighted>
|
|
124
|
|
125 Perl module to calculate descriptive statistics for weighted variates
|
|
126
|
|
127 =back
|
|
128
|
|
129 =head1 VERSION
|
|
130
|
|
131 0.1 28-10-2014
|
|
132
|
|
133 =head1 AUTHOR
|
|
134
|
|
135 Andreas Leimbach aleimba[at]gmx[dot]de
|
|
136
|
|
137 =head1 LICENSE
|
|
138
|
|
139 This program is free software: you can redistribute it and/or modify
|
|
140 it under the terms of the GNU General Public License as published by
|
|
141 the Free Software Foundation; either version 3 (GPLv3) of the License,
|
|
142 or (at your option) any later version.
|
|
143
|
|
144 This program is distributed in the hope that it will be useful, but
|
|
145 WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
146 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
147 General Public License for more details.
|
|
148
|
|
149 You should have received a copy of the GNU General Public License
|
|
150 along with this program. If not, see L<http://www.gnu.org/licenses/>.
|
|
151
|
|
152 =cut
|
|
153
|
|
154
|
|
155 ########
|
|
156 # MAIN #
|
|
157 ########
|
|
158
|
|
159 use strict;
|
|
160 use warnings;
|
|
161 use autodie;
|
|
162 use Getopt::Long;
|
|
163 use Pod::Usage;
|
|
164 eval { # check if modules are installed
|
|
165 require Statistics::Descriptive; # module of basic descriptive statistics
|
|
166 Statistics::Descriptive->import();
|
|
167 require Statistics::Descriptive::Discrete; # module for statistics with 'discrete' values
|
|
168 Statistics::Descriptive::Discrete->import();
|
|
169 require Statistics::Descriptive::Weighted; # module for weighted statistics (includes approximations for quantiles in contrast to 'Statistics::Descriptive::Discrete')
|
|
170 Statistics::Descriptive::Weighted->import();
|
|
171 }; # semi-colon needed
|
|
172 if ($@) { # if module(s) not installed die with error message
|
|
173 die "\n### Fatal error: One or several of the required statistical Perl modules 'Statistics::Descriptive', 'Statistics::Descriptive::Weighted', or 'Statistics::Descriptive::Discrete' are not installed! Please install from CPAN!\n\n";
|
|
174 }
|
|
175
|
|
176 ### Get the options with Getopt::Long
|
|
177 my $Input_File; # either input FASTQ file or STDIN
|
|
178 my $Output_File; # stats are printed to STDOUT, optionally to this output file
|
|
179 my $Qual_Ascii_Offset = 33; # ASCII quality offset for Phred (Sanger) quality scores (see below)
|
|
180 my $Coverage_Limit; # number of bases to sample
|
|
181 my $Num_Read; # number of reads to sample
|
|
182 my $VERSION = 0.1;
|
|
183 my ($Opt_Version, $Opt_Help);
|
|
184 GetOptions ('input=s' => \$Input_File,
|
|
185 'output:s' => \$Output_File,
|
|
186 'qual_offset=i' => \$Qual_Ascii_Offset,
|
|
187 'coverage_limit:i' => \$Coverage_Limit,
|
|
188 'num_read:i' => \$Num_Read,
|
|
189 'version' => \$Opt_Version,
|
|
190 'help|?' => \$Opt_Help);
|
|
191
|
|
192
|
|
193
|
|
194 ### Run perldoc on POD
|
|
195 pod2usage(-verbose => 2) if ($Opt_Help);
|
|
196 die "$0 $VERSION\n" if ($Opt_Version);
|
|
197 if (!$Input_File) {
|
|
198 my $warning = "\n### Fatal error: Option '-i' or its argument is missing!\n";
|
|
199 pod2usage(-verbose => 1, -message => $warning, -exitval => 2);
|
|
200 }
|
|
201
|
|
202
|
|
203
|
|
204 ### Open FASTQ file or accept STDIN, open optional output file
|
|
205 my $Input_Fh;
|
|
206 if ($Input_File eq '-') { # file input via STDIN
|
|
207 $Input_Fh = *STDIN; # capture typeglob of STDIN
|
|
208 } else { # input via FASTQ file
|
|
209 open ($Input_Fh, "<", "$Input_File");
|
|
210 }
|
|
211 my $Output_Fh;
|
|
212 if ($Output_File) {
|
|
213 file_exist($Output_File); # subroutine
|
|
214 open ($Output_Fh, ">", "$Output_File");
|
|
215 }
|
|
216
|
|
217
|
|
218
|
|
219 ### Parse read data in FASTQ file
|
|
220 my ($A, $C, $G, $T, $U, $N);
|
|
221 $A = $C = $G = $T = $U = $N = 0;
|
|
222 my $Total_Bases = 0; # total number of bases
|
|
223
|
|
224 my $Line_Count = 0; # number of lines in the FASTQ file
|
|
225 my $Read_Count = 0; # number of reads
|
|
226 my @Read_Lengths; # length of each individual read
|
|
227
|
|
228 my %Base_Quality_Counts; # quality counts for each individual base
|
|
229 my @Mean_Read_Quals; # average qualities for each read
|
|
230
|
|
231 my $I = 1; # mio. counter for processing status message
|
|
232 while (<$Input_Fh>) { # FASTQ "usually" format uses four lines per sequence
|
|
233 chomp;
|
|
234 $Read_Count++;
|
|
235 $Line_Count++;
|
|
236
|
|
237 # status message
|
|
238 if ($Read_Count/1000000 == $I) {
|
|
239 print STDERR "$I Mio reads processed ...\r"; # carriage return to overwrite messages and not clutter STDERR
|
|
240 $I++;
|
|
241 }
|
|
242
|
|
243 # sequence identifier/read name (line 1)
|
|
244 # e.g.: @H108:287:D0M79ACXX:7:1101:5248:1997 1:N:0:TAGGCATGAGAGTAGA
|
|
245 # @ <instrument‐name>:<run ID>:<flowcell ID>:<lane‐number>:<tile‐number>:<x‐pos>:<y‐pos> <read number>:<is filtered>:<control number>:<barcode sequence>
|
|
246 # [Illumina, CASAVA v1.8 Changes, 05.01.2011]
|
|
247 my $seq_id = $_;
|
|
248 die "\n### Fatal error:\nThis read doesn't have a sequence identifier/read name according to FASTQ specs, it should begin with a '\@':\n$seq_id\n" if ($seq_id !~ /^@/);
|
|
249 $seq_id =~ s/^@(.+)/$1/; # remove '@' to make comparable to $qual_id
|
|
250
|
|
251 # DNA sequence of the read (line 2)
|
|
252 my $seq = read_line(); # subroutine
|
|
253 die "\n### Fatal error:\nRead '$seq_id' has a whitespace in its sequence, which is not allowed according to FASTQ specs:\n$seq\n" if ($seq =~ /\s+/);
|
|
254 base_count($seq, $seq_id); # subroutine
|
|
255 push(@Read_Lengths, length $seq);
|
|
256 $Total_Bases += length $seq;
|
|
257
|
|
258 # optional sequence ID of quality (line 3)
|
|
259 my $qual_id = read_line(); # subroutine
|
|
260 die "\n### Fatal error:\nThe optional sequence identifier/read name for the quality line of read '$seq_id' is not according to FASTQ specs, it should begin with a '+':\n$qual_id\n" if ($qual_id !~ /^\+/);
|
|
261 $qual_id =~ s/^\+(.*)/$1/; # if optional ID is present check if equal to $seq_id in line 1
|
|
262 die "\n### Fatal error:\nThe sequence identifier/read name of read '$seq_id' doesn't fit to the optional ID in the quality line:\n$qual_id\n" if ($qual_id && $qual_id ne $seq_id);
|
|
263
|
|
264 # ASCII quality values (line 4)
|
|
265 # The quality scores (Q; from Illumina 1.3 onwards: Phred [i.e. Sanger] Q = -10 log10(Pe); with Pe being estimated probability for base calling error) are transformed from integer to a single ASCII character for brevity so that a string can represent all of the quality scores within a read. ASCII offset of 33 (Illumina 1.8+; from Illumina 1.3+ was offset of 64). Q + 33 = ASCII. [Illumina, CASAVA v1.8 Changes, 05.01.2011]
|
|
266 my $qual = read_line(); # subroutine
|
|
267 die "\n### Fatal error:\nRead '$seq_id' has a whitespace in its quality values, which is not allowed according to FASTQ specs:\n$qual\n" if ($qual =~ /\s+/);
|
|
268 die "\n### Fatal error:\nRead '$seq_id' has a non-ASCII character in its quality values, which is not allowed according to FASTQ specs:\n$qual\n" if ($qual =~ /[^[:ascii]]/);
|
|
269 die "\n### Fatal error:\nThe quality line of read '$seq_id' doesn't have the same number of symbols as letters in the sequence:\n$seq\n$qual\n" if (length $qual != length $seq);
|
|
270
|
|
271 my $total_read_qual = 0;
|
|
272 foreach (split('', $qual)) {
|
|
273 my $phred_qual = ord($_)-$Qual_Ascii_Offset; # ord converts a character to its ASCII value (an integer), which has to be offset to get the Phred quality value
|
|
274 $Base_Quality_Counts{$phred_qual}++; # count individual base qualities
|
|
275 $total_read_qual += $phred_qual;
|
|
276 }
|
|
277
|
|
278 my $mean_read_qual = sprintf("%.2f", $total_read_qual/length $qual);
|
|
279 push(@Mean_Read_Quals, $mean_read_qual);
|
|
280
|
|
281 if ($Coverage_Limit && $Total_Bases >= $Coverage_Limit) {
|
|
282 print STDERR "\nReached coverage limit of '$Coverage_Limit bp' at read number '$Read_Count' with '$Total_Bases bp'!\n";
|
|
283 last;
|
|
284 }
|
|
285 last if ($Num_Read && $Read_Count == $Num_Read); # skip rest of reads if $num_read is reached
|
|
286 }
|
|
287 print "\n"; # get rid of the carriage return for status messages
|
|
288 close $Input_Fh;
|
|
289
|
|
290
|
|
291
|
|
292 ### Final sanity checks
|
|
293 die "\n### Fatal error:\nFASTQ file doesn't contain four lines for each read, which is an accepted convention in the FASTQ specs! If the sequence and the quality values are interrupted by line breaks please fix with Heng Li's 'seqtk' (https://github.com/lh3/seqtk):\nseqtk seq -l 0 infile.fastq > outfile.fastq\n" if ($Line_Count/4 != $Read_Count); # should be already covered as the parser assumes four lines per read (see above)
|
|
294 my $Total_Nucs = $A + $C + $G + $T + $U + $N;
|
|
295 die "\n### Fatal error:\nThe total number of bases '$Total_Nucs bp' is not equal to the read length sum '$Total_Bases bp'!\nAre there degenerate IUPAC bases (except for 'N') or non-nucleotide characters in the sequence (which is not allowed according to FASTQ specs)?\n" if ($Total_Nucs != $Total_Bases); # should be already covered in subroutine 'base_count' and checks in the parser above
|
|
296
|
|
297
|
|
298
|
|
299 ### Read length stats
|
|
300 my %Read_Lengths_Stats; # store stats for read lengths
|
|
301 stats_full(\@Read_Lengths, \%Read_Lengths_Stats); # subroutine for 'Statistics::Descriptive'
|
|
302 print "#Read stats:\n";
|
|
303 print_out_std("number_of_reads\t$Read_Count\n"); # subroutine
|
|
304 print_out_std("min_read_length\t$Read_Lengths_Stats{'min'}\n");
|
|
305 print_out_std("max_read_length\t$Read_Lengths_Stats{'max'}\n");
|
|
306 print_out_std("mean_read_length\t$Read_Lengths_Stats{'mean'}\n");
|
|
307 print_out_std("q1_read_length\t$Read_Lengths_Stats{'q1'}\n");
|
|
308 print_out_std("median_read_length\t$Read_Lengths_Stats{'median'}\n");
|
|
309 print_out_std("q3_read_length\t$Read_Lengths_Stats{'q3'}\n");
|
|
310 print_out_std("stdev_read_length\t$Read_Lengths_Stats{'sd'}\n");
|
|
311
|
|
312
|
|
313
|
|
314 ### Mean read Phred quality stats
|
|
315 my %Mean_Read_Quals_Stats; # store mean quality stats for reads
|
|
316 stats_full(\@Mean_Read_Quals, \%Mean_Read_Quals_Stats); # subroutine for 'Statistics::Descriptive'
|
|
317 print "#Read quality stats:\n";
|
|
318 print_out_std("min_read_qual\t$Mean_Read_Quals_Stats{'min'}\n"); # subroutine
|
|
319 print_out_std("max_read_qual\t$Mean_Read_Quals_Stats{'max'}\n");
|
|
320 print_out_std("mean_read_qual\t$Mean_Read_Quals_Stats{'mean'}\n");
|
|
321 print_out_std("q1_read_qual\t$Mean_Read_Quals_Stats{'q1'}\n");
|
|
322 print_out_std("median_read_qual\t$Mean_Read_Quals_Stats{'median'}\n");
|
|
323 print_out_std("q3_read_qual\t$Mean_Read_Quals_Stats{'q3'}\n");
|
|
324 print_out_std("stdev_read_qual\t$Mean_Read_Quals_Stats{'sd'}\n");
|
|
325
|
|
326
|
|
327
|
|
328 ### Total bases Phred quality stats
|
|
329 my %Base_Quals_Stats; # store quality stats for individual bases
|
|
330 stats_discrete_full(\%Base_Quality_Counts, \%Base_Quals_Stats); # subroutine for 'Statistics::Descriptive::Discrete' and 'Statistics::Descriptive::Weighted'
|
|
331 print "#Base quality stats:\n";
|
|
332 print_out_std("min_base_qual\t$Base_Quals_Stats{'min'}\n"); # subroutine
|
|
333 print_out_std("max_base_qual\t$Base_Quals_Stats{'max'}\n");
|
|
334 print_out_std("mean_base_qual\t$Base_Quals_Stats{'mean'}\n");
|
|
335 print_out_std("q1_base_qual\t$Base_Quals_Stats{'q1'}\n");
|
|
336 print_out_std("median_base_qual\t$Base_Quals_Stats{'median'}\n");
|
|
337 print_out_std("q3_base_qual\t$Base_Quals_Stats{'q3'}\n");
|
|
338 print_out_std("stdev_base_qual\t$Base_Quals_Stats{'sd'}\n");
|
|
339
|
|
340
|
|
341
|
|
342 ### Base stats
|
|
343 print "#Base count stats:\n";
|
|
344 print_out_std("number_of_bases\t$Total_Bases\n"); # subroutine
|
|
345 print_out_std("A_count\t$A\n");
|
|
346 print_out_std("C_count\t$C\n");
|
|
347 print_out_std("G_count\t$G\n");
|
|
348 print_out_std("T_count\t$T\n");
|
|
349 if ($U) {
|
|
350 print STDERR "Nucleotide 'U' found in the sequences, RNA?\n" if ($U);
|
|
351 print_out_std("U_count\t$U\n") if ($U);
|
|
352 }
|
|
353 print_out_std("N_count\t$N\n") if ($N);
|
|
354 my $GC_Content = sprintf ("%.2f", (($C + $G)/$Total_Bases)*100);
|
|
355 print_out_std("GC_content_[%]\t$GC_Content\n");
|
|
356 close Output_Fh if ($Output_File);
|
|
357
|
|
358
|
|
359 exit;
|
|
360
|
|
361 #############
|
|
362 #Subroutines#
|
|
363 #############
|
|
364
|
|
365 ### Subroutine to count bases
|
|
366 sub base_count {
|
|
367 my ($seq, $seq_id) = @_;
|
|
368 die "\n### Fatal error:\nRead '$seq_id' has a IUPAC degenerate base (except for 'N') or non-nucleotide character in its sequence, which is not allowed according to FASTQ specs:\n$seq\n" if ($seq =~ /[^acgtun]/i);
|
|
369 $A += ($seq =~ tr/[aA]//);
|
|
370 $C += ($seq =~ tr/[cC]//);
|
|
371 $G += ($seq =~ tr/[gG]//);
|
|
372 $T += ($seq =~ tr/[tT]//);
|
|
373 $U += ($seq =~ tr/[uU]//);
|
|
374 $N += ($seq =~ tr/[nN]//);
|
|
375 return 1;
|
|
376 }
|
|
377
|
|
378
|
|
379
|
|
380 ### Subroutine to test for file existence and give warning to STDERR
|
|
381 sub file_exist {
|
|
382 my $file = shift;
|
|
383 if (-e $file) {
|
|
384 warn "\nThe result file '$file' exists already and will be overwritten!\n\n";
|
|
385 return 1;
|
|
386 }
|
|
387 return 0;
|
|
388 }
|
|
389
|
|
390
|
|
391
|
|
392 ### Subroutine to read in a line from input, chomp, and count it
|
|
393 sub read_line {
|
|
394 my $line = <$Input_Fh>;
|
|
395 $Line_Count++;
|
|
396 chomp $line;
|
|
397 return $line;
|
|
398 }
|
|
399
|
|
400
|
|
401
|
|
402 ### Subroutine to calculate stats with modules 'Statistics::Descriptive::Discrete' and 'Statistics::Descriptive::Weighted' for large data sets, which overburden RAM and/or module 'Statistics::Descriptive'
|
|
403 sub stats_discrete_full {
|
|
404 my ($data_hash_ref, $hash_stat_ref) = @_;
|
|
405
|
|
406 # convert data to needed formats
|
|
407 my @discrete_tuple; # for 'Statistics::Descriptive::Discrete'
|
|
408 my (@values, @weights); # for 'Statistics::Descriptive::Weighted'
|
|
409 foreach (sort {$a <=> $b} keys %$data_hash_ref) { # de-reference hash ref
|
|
410 push(@discrete_tuple, ($_, $data_hash_ref->{$_})); # expects first value then weight, de-reference
|
|
411 push(@values, $_);
|
|
412 push(@weights, $data_hash_ref->{$_});
|
|
413 }
|
|
414
|
|
415 my $stat_discrete = Statistics::Descriptive::Discrete->new();
|
|
416 $stat_discrete->add_data_tuple(@discrete_tuple);
|
|
417 $hash_stat_ref->{'mean'} = sprintf("%.2f", $stat_discrete->mean()); # rounded to two decimal places
|
|
418 $hash_stat_ref->{'median'} = sprintf("%.2f", $stat_discrete->median());
|
|
419 $hash_stat_ref->{'sd'} = sprintf("%.2f", $stat_discrete->standard_deviation());
|
|
420 $hash_stat_ref->{'min'} = $stat_discrete->min();
|
|
421 $hash_stat_ref->{'max'} = $stat_discrete->max();
|
|
422
|
|
423 # quantile approximations only implemented in 'Statistics::Descriptive::Weighted' not in 'Statistics::Descriptive::Discrete'
|
|
424 my $stat_weighted = Statistics::Descriptive::Weighted::Full->new();
|
|
425 $stat_weighted->add_data(\@values, \@weights); # add data as array refs
|
|
426 $hash_stat_ref->{'q1'} = sprintf("%.2f", $stat_weighted->quantile(.25)); # Q1, first quartile (25th percentile)
|
|
427 $hash_stat_ref->{'q3'} = sprintf("%.2f", $stat_weighted->quantile(.75)); # Q3, third quartile (75th percentile)
|
|
428 return 1;
|
|
429 }
|
|
430
|
|
431
|
|
432
|
|
433 ### Subroutine to calculate stats with module 'Statistics::Descriptive'
|
|
434 sub stats_full {
|
|
435 my ($data_array_ref, $hash_stat_ref) = @_;
|
|
436 my $stat = Statistics::Descriptive::Full->new();
|
|
437 $stat->add_data(@$data_array_ref); # de-reference array ref
|
|
438 $hash_stat_ref->{'mean'} = sprintf("%.2f", $stat->mean());
|
|
439 $hash_stat_ref->{'median'} = sprintf("%.2f", $stat->median());
|
|
440 $hash_stat_ref->{'sd'} = sprintf("%.2f", $stat->standard_deviation());
|
|
441 $hash_stat_ref->{'min'} = $stat->min();
|
|
442 $hash_stat_ref->{'max'} = $stat->max();
|
|
443 $hash_stat_ref->{'q1'} = sprintf("%.2f", $stat->quantile(1)); # Q1, first quartile (25th percentile)
|
|
444 $hash_stat_ref->{'q3'} = sprintf("%.2f", $stat->quantile(3)); # Q3, third quartile (75th percentile)
|
|
445 # $stat->clear(); # remove stats from the module for next round; not needed because of 'new()' initialisation at begin of subroutine
|
|
446 return 1;
|
|
447 }
|
|
448
|
|
449
|
|
450
|
|
451 ### Subroutine to print to STDOUT and optionally to output file
|
|
452 sub print_out_std {
|
|
453 print "@_"; # print line to STDOUT
|
|
454 print $Output_Fh "@_" if ($Output_File); # additionally, print to optional output file
|
|
455 return 1;
|
|
456 }
|