0
+ − 1 package Statistics::Distributions;
+ − 2
+ − 3 use strict;
+ − 4 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
+ − 5 use constant PI => 3.1415926536;
+ − 6 use constant SIGNIFICANT => 5; # number of significant digits to be returned
+ − 7
+ − 8 require Exporter;
+ − 9
+ − 10 @ISA = qw(Exporter AutoLoader);
+ − 11 # Items to export into callers namespace by default. Note: do not export
+ − 12 # names by default without a very good reason. Use EXPORT_OK instead.
+ − 13 # Do not simply export all your public functions/methods/constants.
+ − 14 @EXPORT_OK = qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob);
+ − 15 $VERSION = '1.02';
+ − 16
+ − 17 # Preloaded methods go here.
+ − 18
+ − 19 sub chisqrdistr { # Percentage points X^2(x^2,n)
+ − 20 my ($n, $p) = @_;
+ − 21 if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
+ − 22 die "Invalid n: $n\n"; # degree of freedom
+ − 23 }
+ − 24 if ($p <= 0 || $p > 1) {
+ − 25 die "Invalid p: $p\n";
+ − 26 }
+ − 27 return precision_string(_subchisqr($n, $p));
+ − 28 }
+ − 29
+ − 30 sub udistr { # Percentage points N(0,1^2)
+ − 31 my ($p) = (@_);
+ − 32 if ($p > 1 || $p <= 0) {
+ − 33 die "Invalid p: $p\n";
+ − 34 }
+ − 35 return precision_string(_subu($p));
+ − 36 }
+ − 37
+ − 38 sub tdistr { # Percentage points t(x,n)
+ − 39 my ($n, $p) = @_;
+ − 40 if ($n <= 0 || abs($n) - abs(int($n)) != 0) {
+ − 41 die "Invalid n: $n\n";
+ − 42 }
+ − 43 if ($p <= 0 || $p >= 1) {
+ − 44 die "Invalid p: $p\n";
+ − 45 }
+ − 46 return precision_string(_subt($n, $p));
+ − 47 }
+ − 48
+ − 49 sub fdistr { # Percentage points F(x,n1,n2)
+ − 50 my ($n, $m, $p) = @_;
+ − 51 if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
+ − 52 die "Invalid n: $n\n"; # first degree of freedom
+ − 53 }
+ − 54 if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
+ − 55 die "Invalid m: $m\n"; # second degree of freedom
+ − 56 }
+ − 57 if (($p<=0) || ($p>1)) {
+ − 58 die "Invalid p: $p\n";
+ − 59 }
+ − 60 return precision_string(_subf($n, $m, $p));
+ − 61 }
+ − 62
+ − 63 sub uprob { # Upper probability N(0,1^2)
+ − 64 my ($x) = @_;
+ − 65 return precision_string(_subuprob($x));
+ − 66 }
+ − 67
+ − 68 sub chisqrprob { # Upper probability X^2(x^2,n)
+ − 69 my ($n,$x) = @_;
+ − 70 if (($n <= 0) || ((abs($n) - (abs(int($n)))) != 0)) {
+ − 71 die "Invalid n: $n\n"; # degree of freedom
+ − 72 }
+ − 73 return precision_string(_subchisqrprob($n, $x));
+ − 74 }
+ − 75
+ − 76 sub tprob { # Upper probability t(x,n)
+ − 77 my ($n, $x) = @_;
+ − 78 if (($n <= 0) || ((abs($n) - abs(int($n))) !=0)) {
+ − 79 die "Invalid n: $n\n"; # degree of freedom
+ − 80 }
+ − 81 return precision_string(_subtprob($n, $x));
+ − 82 }
+ − 83
+ − 84 sub fprob { # Upper probability F(x,n1,n2)
+ − 85 my ($n, $m, $x) = @_;
+ − 86 if (($n<=0) || ((abs($n)-(abs(int($n))))!=0)) {
+ − 87 die "Invalid n: $n\n"; # first degree of freedom
+ − 88 }
+ − 89 if (($m<=0) || ((abs($m)-(abs(int($m))))!=0)) {
+ − 90 die "Invalid m: $m\n"; # second degree of freedom
+ − 91 }
+ − 92 return precision_string(_subfprob($n, $m, $x));
+ − 93 }
+ − 94
+ − 95
+ − 96 sub _subfprob {
+ − 97 my ($n, $m, $x) = @_;
+ − 98 my $p;
+ − 99
+ − 100 if ($x<=0) {
+ − 101 $p=1;
+ − 102 } elsif ($m % 2 == 0) {
+ − 103 my $z = $m / ($m + $n * $x);
+ − 104 my $a = 1;
+ − 105 for (my $i = $m - 2; $i >= 2; $i -= 2) {
+ − 106 $a = 1 + ($n + $i - 2) / $i * $z * $a;
+ − 107 }
+ − 108 $p = 1 - ((1 - $z) ** ($n / 2) * $a);
+ − 109 } elsif ($n % 2 == 0) {
+ − 110 my $z = $n * $x / ($m + $n * $x);
+ − 111 my $a = 1;
+ − 112 for (my $i = $n - 2; $i >= 2; $i -= 2) {
+ − 113 $a = 1 + ($m + $i - 2) / $i * $z * $a;
+ − 114 }
+ − 115 $p = (1 - $z) ** ($m / 2) * $a;
+ − 116 } else {
+ − 117 my $y = atan2(sqrt($n * $x / $m), 1);
+ − 118 my $z = sin($y) ** 2;
+ − 119 my $a = ($n == 1) ? 0 : 1;
+ − 120 for (my $i = $n - 2; $i >= 3; $i -= 2) {
+ − 121 $a = 1 + ($m + $i - 2) / $i * $z * $a;
+ − 122 }
+ − 123 my $b = PI;
+ − 124 for (my $i = 2; $i <= $m - 1; $i += 2) {
+ − 125 $b *= ($i - 1) / $i;
+ − 126 }
+ − 127 my $p1 = 2 / $b * sin($y) * cos($y) ** $m * $a;
+ − 128
+ − 129 $z = cos($y) ** 2;
+ − 130 $a = ($m == 1) ? 0 : 1;
+ − 131 for (my $i = $m-2; $i >= 3; $i -= 2) {
+ − 132 $a = 1 + ($i - 1) / $i * $z * $a;
+ − 133 }
+ − 134 $p = max(0, $p1 + 1 - 2 * $y / PI
+ − 135 - 2 / PI * sin($y) * cos($y) * $a);
+ − 136 }
+ − 137 return $p;
+ − 138 }
+ − 139
+ − 140
+ − 141 sub _subchisqrprob {
+ − 142 my ($n,$x) = @_;
+ − 143 my $p;
+ − 144
+ − 145 if ($x <= 0) {
+ − 146 $p = 1;
+ − 147 } elsif ($n > 100) {
+ − 148 $p = _subuprob((($x / $n) ** (1/3)
+ − 149 - (1 - 2/9/$n)) / sqrt(2/9/$n));
+ − 150 } elsif ($x > 400) {
+ − 151 $p = 0;
+ − 152 } else {
+ − 153 my ($a, $i, $i1);
+ − 154 if (($n % 2) != 0) {
+ − 155 $p = 2 * _subuprob(sqrt($x));
+ − 156 $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
+ − 157 $i1 = 1;
+ − 158 } else {
+ − 159 $p = $a = exp(-$x/2);
+ − 160 $i1 = 2;
+ − 161 }
+ − 162
+ − 163 for ($i = $i1; $i <= ($n-2); $i += 2) {
+ − 164 $a *= $x / $i;
+ − 165 $p += $a;
+ − 166 }
+ − 167 }
+ − 168 return $p;
+ − 169 }
+ − 170
+ − 171 sub _subu {
+ − 172 my ($p) = @_;
+ − 173 my $y = -log(4 * $p * (1 - $p));
+ − 174 my $x = sqrt(
+ − 175 $y * (1.570796288
+ − 176 + $y * (.03706987906
+ − 177 + $y * (-.8364353589E-3
+ − 178 + $y *(-.2250947176E-3
+ − 179 + $y * (.6841218299E-5
+ − 180 + $y * (0.5824238515E-5
+ − 181 + $y * (-.104527497E-5
+ − 182 + $y * (.8360937017E-7
+ − 183 + $y * (-.3231081277E-8
+ − 184 + $y * (.3657763036E-10
+ − 185 + $y *.6936233982E-12)))))))))));
+ − 186 $x = -$x if ($p>.5);
+ − 187 return $x;
+ − 188 }
+ − 189
+ − 190 sub _subuprob {
+ − 191 my ($x) = @_;
+ − 192 my $p = 0; # if ($absx > 100)
+ − 193 my $absx = abs($x);
+ − 194
+ − 195 if ($absx < 1.9) {
+ − 196 $p = (1 +
+ − 197 $absx * (.049867347
+ − 198 + $absx * (.0211410061
+ − 199 + $absx * (.0032776263
+ − 200 + $absx * (.0000380036
+ − 201 + $absx * (.0000488906
+ − 202 + $absx * .000005383)))))) ** -16/2;
+ − 203 } elsif ($absx <= 100) {
+ − 204 for (my $i = 18; $i >= 1; $i--) {
+ − 205 $p = $i / ($absx + $p);
+ − 206 }
+ − 207 $p = exp(-.5 * $absx * $absx)
+ − 208 / sqrt(2 * PI) / ($absx + $p);
+ − 209 }
+ − 210
+ − 211 $p = 1 - $p if ($x<0);
+ − 212 return $p;
+ − 213 }
+ − 214
+ − 215
+ − 216 sub _subt {
+ − 217 my ($n, $p) = @_;
+ − 218
+ − 219 if ($p >= 1 || $p <= 0) {
+ − 220 die "Invalid p: $p\n";
+ − 221 }
+ − 222
+ − 223 if ($p == 0.5) {
+ − 224 return 0;
+ − 225 } elsif ($p < 0.5) {
+ − 226 return - _subt($n, 1 - $p);
+ − 227 }
+ − 228
+ − 229 my $u = _subu($p);
+ − 230 my $u2 = $u ** 2;
+ − 231
+ − 232 my $a = ($u2 + 1) / 4;
+ − 233 my $b = ((5 * $u2 + 16) * $u2 + 3) / 96;
+ − 234 my $c = (((3 * $u2 + 19) * $u2 + 17) * $u2 - 15) / 384;
+ − 235 my $d = ((((79 * $u2 + 776) * $u2 + 1482) * $u2 - 1920) * $u2 - 945)
+ − 236 / 92160;
+ − 237 my $e = (((((27 * $u2 + 339) * $u2 + 930) * $u2 - 1782) * $u2 - 765) * $u2
+ − 238 + 17955) / 368640;
+ − 239
+ − 240 my $x = $u * (1 + ($a + ($b + ($c + ($d + $e / $n) / $n) / $n) / $n) / $n);
+ − 241
+ − 242 if ($n <= log10($p) ** 2 + 3) {
+ − 243 my $round;
+ − 244 do {
+ − 245 my $p1 = _subtprob($n, $x);
+ − 246 my $n1 = $n + 1;
+ − 247 my $delta = ($p1 - $p)
+ − 248 / exp(($n1 * log($n1 / ($n + $x * $x))
+ − 249 + log($n/$n1/2/PI) - 1
+ − 250 + (1/$n1 - 1/$n) / 6) / 2);
+ − 251 $x += $delta;
+ − 252 $round = sprintf("%.".abs(int(log10(abs $x)-4))."f",$delta);
+ − 253 } while (($x) && ($round != 0));
+ − 254 }
+ − 255 return $x;
+ − 256 }
+ − 257
+ − 258 sub _subtprob {
+ − 259 my ($n, $x) = @_;
+ − 260
+ − 261 my ($a,$b);
+ − 262 my $w = atan2($x / sqrt($n), 1);
+ − 263 my $z = cos($w) ** 2;
+ − 264 my $y = 1;
+ − 265
+ − 266 for (my $i = $n-2; $i >= 2; $i -= 2) {
+ − 267 $y = 1 + ($i-1) / $i * $z * $y;
+ − 268 }
+ − 269
+ − 270 if ($n % 2 == 0) {
+ − 271 $a = sin($w)/2;
+ − 272 $b = .5;
+ − 273 } else {
+ − 274 $a = ($n == 1) ? 0 : sin($w)*cos($w)/PI;
+ − 275 $b= .5 + $w/PI;
+ − 276 }
+ − 277 return max(0, 1 - $b - $a * $y);
+ − 278 }
+ − 279
+ − 280 sub _subf {
+ − 281 my ($n, $m, $p) = @_;
+ − 282 my $x;
+ − 283
+ − 284 if ($p >= 1 || $p <= 0) {
+ − 285 die "Invalid p: $p\n";
+ − 286 }
+ − 287
+ − 288 if ($p == 1) {
+ − 289 $x = 0;
+ − 290 } elsif ($m == 1) {
+ − 291 $x = 1 / (_subt($n, 0.5 - $p / 2) ** 2);
+ − 292 } elsif ($n == 1) {
+ − 293 $x = _subt($m, $p/2) ** 2;
+ − 294 } elsif ($m == 2) {
+ − 295 my $u = _subchisqr($m, 1 - $p);
+ − 296 my $a = $m - 2;
+ − 297 $x = 1 / ($u / $m * (1 +
+ − 298 (($u - $a) / 2 +
+ − 299 (((4 * $u - 11 * $a) * $u + $a * (7 * $m - 10)) / 24 +
+ − 300 (((2 * $u - 10 * $a) * $u + $a * (17 * $m - 26)) * $u
+ − 301 - $a * $a * (9 * $m - 6)
+ − 302 )/48/$n
+ − 303 )/$n
+ − 304 )/$n));
+ − 305 } elsif ($n > $m) {
+ − 306 $x = 1 / _subf2($m, $n, 1 - $p)
+ − 307 } else {
+ − 308 $x = _subf2($n, $m, $p)
+ − 309 }
+ − 310 return $x;
+ − 311 }
+ − 312
+ − 313 sub _subf2 {
+ − 314 my ($n, $m, $p) = @_;
+ − 315 my $u = _subchisqr($n, $p);
+ − 316 my $n2 = $n - 2;
+ − 317 my $x = $u / $n *
+ − 318 (1 +
+ − 319 (($u - $n2) / 2 +
+ − 320 (((4 * $u - 11 * $n2) * $u + $n2 * (7 * $n - 10)) / 24 +
+ − 321 (((2 * $u - 10 * $n2) * $u + $n2 * (17 * $n - 26)) * $u
+ − 322 - $n2 * $n2 * (9 * $n - 6)) / 48 / $m) / $m) / $m);
+ − 323 my $delta;
+ − 324 do {
+ − 325 my $z = exp(
+ − 326 (($n+$m) * log(($n+$m) / ($n * $x + $m))
+ − 327 + ($n - 2) * log($x)
+ − 328 + log($n * $m / ($n+$m))
+ − 329 - log(4 * PI)
+ − 330 - (1/$n + 1/$m - 1/($n+$m))/6
+ − 331 )/2);
+ − 332 $delta = (_subfprob($n, $m, $x) - $p) / $z;
+ − 333 $x += $delta;
+ − 334 } while (abs($delta)>3e-4);
+ − 335 return $x;
+ − 336 }
+ − 337
+ − 338 sub _subchisqr {
+ − 339 my ($n, $p) = @_;
+ − 340 my $x;
+ − 341
+ − 342 if (($p > 1) || ($p <= 0)) {
+ − 343 die "Invalid p: $p\n";
+ − 344 } elsif ($p == 1){
+ − 345 $x = 0;
+ − 346 } elsif ($n == 1) {
+ − 347 $x = _subu($p / 2) ** 2;
+ − 348 } elsif ($n == 2) {
+ − 349 $x = -2 * log($p);
+ − 350 } else {
+ − 351 my $u = _subu($p);
+ − 352 my $u2 = $u * $u;
+ − 353
+ − 354 $x = max(0, $n + sqrt(2 * $n) * $u
+ − 355 + 2/3 * ($u2 - 1)
+ − 356 + $u * ($u2 - 7) / 9 / sqrt(2 * $n)
+ − 357 - 2/405 / $n * ($u2 * (3 *$u2 + 7) - 16));
+ − 358
+ − 359 if ($n <= 100) {
+ − 360 my ($x0, $p1, $z);
+ − 361 do {
+ − 362 $x0 = $x;
+ − 363 if ($x < 0) {
+ − 364 $p1 = 1;
+ − 365 } elsif ($n>100) {
+ − 366 $p1 = _subuprob((($x / $n)**(1/3) - (1 - 2/9/$n))
+ − 367 / sqrt(2/9/$n));
+ − 368 } elsif ($x>400) {
+ − 369 $p1 = 0;
+ − 370 } else {
+ − 371 my ($i0, $a);
+ − 372 if (($n % 2) != 0) {
+ − 373 $p1 = 2 * _subuprob(sqrt($x));
+ − 374 $a = sqrt(2/PI) * exp(-$x/2) / sqrt($x);
+ − 375 $i0 = 1;
+ − 376 } else {
+ − 377 $p1 = $a = exp(-$x/2);
+ − 378 $i0 = 2;
+ − 379 }
+ − 380
+ − 381 for (my $i = $i0; $i <= $n-2; $i += 2) {
+ − 382 $a *= $x / $i;
+ − 383 $p1 += $a;
+ − 384 }
+ − 385 }
+ − 386 $z = exp((($n-1) * log($x/$n) - log(4*PI*$x)
+ − 387 + $n - $x - 1/$n/6) / 2);
+ − 388 $x += ($p1 - $p) / $z;
+ − 389 $x = sprintf("%.5f", $x);
+ − 390 } while (($n < 31) && (abs($x0 - $x) > 1e-4));
+ − 391 }
+ − 392 }
+ − 393 return $x;
+ − 394 }
+ − 395
+ − 396 sub log10 {
+ − 397 my $n = shift;
+ − 398 return log($n) / log(10);
+ − 399 }
+ − 400
+ − 401 sub max {
+ − 402 my $max = shift;
+ − 403 my $next;
+ − 404 while (@_) {
+ − 405 $next = shift;
+ − 406 $max = $next if ($next > $max);
+ − 407 }
+ − 408 return $max;
+ − 409 }
+ − 410
+ − 411 sub min {
+ − 412 my $min = shift;
+ − 413 my $next;
+ − 414 while (@_) {
+ − 415 $next = shift;
+ − 416 $min = $next if ($next < $min);
+ − 417 }
+ − 418 return $min;
+ − 419 }
+ − 420
+ − 421 sub precision {
+ − 422 my ($x) = @_;
+ − 423 return abs int(log10(abs $x) - SIGNIFICANT);
+ − 424 }
+ − 425
+ − 426 sub precision_string {
+ − 427 my ($x) = @_;
+ − 428 if ($x) {
+ − 429 return sprintf "%." . precision($x) . "f", $x;
+ − 430 } else {
+ − 431 return "0";
+ − 432 }
+ − 433 }
+ − 434
+ − 435
+ − 436 # Autoload methods go after =cut, and are processed by the autosplit program.
+ − 437
+ − 438 1;
+ − 439 __END__
+ − 440 # Below is the stub of documentation for your module. You better edit it!
+ − 441
+ − 442 =head1 NAME
+ − 443
+ − 444 Statistics::Distributions - Perl module for calculating critical values and upper probabilities of common statistical distributions
+ − 445
+ − 446 =head1 SYNOPSIS
+ − 447
+ − 448 use Statistics::Distributions;
+ − 449
+ − 450 $chis=Statistics::Distributions::chisqrdistr (2,.05);
+ − 451 print "Chi-squared-crit (2 degrees of freedom, 95th percentile "
+ − 452 ."= 0.05 level) = $chis\n";
+ − 453
+ − 454 $u=Statistics::Distributions::udistr (.05);
+ − 455 print "u-crit (95th percentile = 0.05 level) = $u\n";
+ − 456
+ − 457 $t=Statistics::Distributions::tdistr (1,.005);
+ − 458 print "t-crit (1 degree of freedom, 99.5th percentile = 0.005 level) "
+ − 459 ."= $t\n";
+ − 460
+ − 461 $f=Statistics::Distributions::fdistr (1,3,.01);
+ − 462 print "F-crit (1 degree of freedom in numerator, 3 degrees of freedom "
+ − 463 ."in denominator, 99th percentile = 0.01 level) = $f\n";
+ − 464
+ − 465 $uprob=Statistics::Distributions::uprob (-0.85);
+ − 466 print "upper probability of the u distribution (u = -0.85): Q(u) "
+ − 467 ."= 1-G(u) = $uprob\n";
+ − 468
+ − 469 $chisprob=Statistics::Distributions::chisqrprob (3,6.25);
+ − 470 print "upper probability of the chi-square distribution (3 degrees "
+ − 471 ."of freedom, chi-squared = 6.25): Q = 1-G = $chisprob\n";
+ − 472
+ − 473 $tprob=Statistics::Distributions::tprob (3,6.251);
+ − 474 print "upper probability of the t distribution (3 degrees of "
+ − 475 ."freedom, t = 6.251): Q = 1-G = $tprob\n";
+ − 476
+ − 477 $fprob=Statistics::Distributions::fprob (3,5,.625);
+ − 478 print "upper probability of the F distribution (3 degrees of freedom "
+ − 479 ."in numerator, 5 degrees of freedom in denominator, F = 6.25): "
+ − 480 ."Q = 1-G = $fprob\n";
+ − 481
+ − 482 =head1 DESCRIPTION
+ − 483
+ − 484 This Perl module calculates percentage points (5 significant digits) of the u (standard normal) distribution, the student's t distribution, the chi-square distribution and the F distribution. It can also calculate the upper probability (5 significant digits) of the u (standard normal), the chi-square, the t and the F distribution.
+ − 485 These critical values are needed to perform statistical tests, like the u test, the t test, the F test and the chi-squared test, and to calculate confidence intervals.
+ − 486
+ − 487 If you are interested in more precise algorithms you could look at:
+ − 488 StatLib: http://lib.stat.cmu.edu/apstat/ ;
+ − 489 Applied Statistics Algorithms by Griffiths, P. and Hill, I.D., Ellis Horwood: Chichester (1985)
+ − 490
+ − 491 =head1 BUGS
+ − 492
+ − 493 This final version 1.02 has been released after more than one year without a bug report on the previous version 0.07.
+ − 494 Nevertheless, if you find any bugs or oddities, please do inform the author.
+ − 495
+ − 496 =head1 INSTALLATION
+ − 497
+ − 498 See perlmodinstall for information and options on installing Perl modules.
+ − 499
+ − 500 =head1 AVAILABILITY
+ − 501
+ − 502 The latest version of this module is available from the Distribution Perl Archive Network (CPAN). Please visit http://www.cpan.org/ to find a CPAN site near you or see http://www.cpan.org/authors/id/M/MI/MIKEK/ .
+ − 503
+ − 504 =head1 AUTHOR
+ − 505
+ − 506 Michael Kospach <mike.perl@gmx.at>
+ − 507
+ − 508 Nice formating, simplification and bug repair by Matthias Trautner Kromann <mtk@id.cbs.dk>
+ − 509
+ − 510 =head1 COPYRIGHT
+ − 511
+ − 512 Copyright 2003 Michael Kospach. All rights reserved.
+ − 513
+ − 514 This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
+ − 515
+ − 516 =head1 SEE ALSO
+ − 517
+ − 518 Statistics::ChiSquare, Statistics::Table::t, Statistics::Table::F, perl(1).
+ − 519
+ − 520 =cut
+ − 521
+ − 522
+ − 523
+ − 524
+ − 525
+ − 526
+ − 527
+ − 528
+ − 529