#-*-Mode: text;-*- eval "\$opt_$1=\$2" while $ARGV[0] =~ /^(\w+)=(.*)/ && shift; #Input file must be organized independent var(s) first, then dependent one. #Each line is one sample: a bunch of numbers (for each variable) separated # by white space. #Optional line of form '#varnames: name1, name2, ..., nameN' names the vars #Default is X1, X2, ...., Xn, Y $nVars = 0; $sampleSize = 0; #Appendix B, p. 788. First row is n1 header, else 1st column is n2 header. $FtableData05 = <<'END_OF_TABLE'; 1 2 3 4 5 6 7 8 9 10 11 12 14 16 20 24 30 40 50 75 100 200 500 INF 1: 161 200 216 225 230 234 237 239 241 242 243 244 245 246 248 249 250 251 252 253 253 254 254 254 2: 18.51 19.00 19.16 19.25 19.30 19.33 19.36 19.37 19.38 19.39 19.40 19.41 19.42 19.43 19.44 19.45 19.46 19.47 19.47 19.48 19.49 19.49 19.50 19.50 3: 10.13 9.55 9.28 9.12 9.01 8.94 8.88 8.84 8.81 8.78 8.76 8.74 8.71 8.69 8.66 8.64 8.62 8.60 8.58 8.57 8.56 8.54 8.54 8.53 4: 7.71 6.94 6.59 6.39 6.26 6.16 6.09 6.04 6.00 5.96 5.93 5.91 5.87 5.84 5.80 5.77 5.74 5.71 5.70 5.68 5.66 5.65 5.64 5.63 5: 6.61 5.79 5.41 5.19 5.05 4.95 4.88 4.82 4.78 4.74 4.70 4.68 4.64 4.60 4.56 4.53 4.50 4.46 4.44 4.42 4.40 4.38 4.37 4.36 6: 5.99 5.14 4.76 4.53 4.39 4.28 4.21 4.15 4.10 4.06 4.03 4.00 3.96 3.92 3.87 3.84 3.81 3.77 3.75 3.72 3.71 3.69 3.68 3.67 7: 5.59 4.74 4.35 4.12 3.97 3.87 3.79 3.73 3.68 3.63 3.60 3.57 3.52 3.49 3.44 3.41 3.38 3.34 3.32 3.29 3.28 3.25 3.24 3.23 8: 5.32 4.46 4.07 3.84 3.69 3.58 3.50 3.44 3.39 3.34 3.31 3.28 3.23 3.20 3.15 3.12 3.08 3.05 3.03 3.00 2.98 2.96 2.94 2.93 9: 5.12 4.26 3.86 3.63 3.48 3.37 3.29 3.23 3.18 3.13 3.10 3.07 3.02 2.98 2.93 2.90 2.86 2.82 2.80 2.77 2.76 2.73 2.72 2.71 10: 4.96 4.10 3.71 3.48 3.33 3.22 3.14 3.07 3.02 2.97 2.94 2.91 2.86 2.82 2.77 2.74 2.70 2.67 2.64 2.61 2.59 2.56 2.55 2.54 11: 4.84 3.98 3.59 3.36 3.20 3.09 3.01 2.95 2.90 2.86 2.82 2.79 2.74 2.70 2.65 2.61 2.57 2.53 2.50 2.47 2.45 2.42 2.41 2.40 12: 4.75 3.88 3.49 3.26 3.11 3.00 2.92 2.85 2.80 2.76 2.72 2.69 2.64 2.60 2.54 2.50 2.46 2.42 2.40 2.36 2.35 2.32 2.31 2.30 13: 4.67 3.80 3.41 3.18 3.02 2.92 2.84 2.77 2.72 2.67 2.63 2.60 2.55 2.51 2.46 2.42 2.38 2.34 2.32 2.28 2.26 2.24 2.22 2.21 14: 4.60 3.74 3.34 3.11 2.96 2.85 2.77 2.70 2.65 2.60 2.56 2.53 2.48 2.44 2.39 2.35 2.31 2.27 2.24 2.21 2.19 2.16 2.14 2.13 15: 4.54 3.68 3.29 3.06 2.90 2.79 2.70 2.64 2.59 2.55 2.51 2.48 2.43 2.39 2.33 2.29 2.25 2.21 2.18 2.15 2.12 2.10 2.08 2.07 16: 4.49 3.63 3.24 3.01 2.85 2.74 2.66 2.59 2.54 2.49 2.45 2.42 2.37 2.33 2.28 2.24 2.20 2.16 2.13 2.09 2.07 2.04 2.02 2.01 17: 4.45 3.59 3.20 2.96 2.81 2.70 2.62 2.55 2.50 2.45 2.41 2.38 2.33 2.29 2.23 2.19 2.15 2.11 2.08 2.04 2.02 1.99 1.97 1.96 18: 4.41 3.55 3.16 2.93 2.77 2.66 2.58 2.51 2.46 2.41 2.37 2.34 2.29 2.25 2.19 2.15 2.11 2.07 2.04 2.00 1.98 1.95 1.93 1.92 19: 4.38 3.52 3.13 2.90 2.74 2.63 2.55 2.48 2.43 2.38 2.34 2.31 2.26 2.21 2.15 2.11 2.07 2.02 2.00 1.96 1.94 1.91 1.90 1.88 20: 4.35 3.49 3.10 2.87 2.71 2.60 2.52 2.45 2.40 2.35 2.31 2.28 2.23 2.18 2.12 2.08 2.04 1.99 1.96 1.92 1.90 1.87 1.85 1.84 21: 4.32 3.47 3.07 2.84 2.68 2.57 2.49 2.42 2.37 2.32 2.28 2.25 2.20 2.15 2.09 2.05 2.00 1.96 1.93 1.89 1.87 1.84 1.82 1.81 22: 4.30 3.44 3.05 2.82 2.66 2.55 2.47 2.40 2.35 2.30 2.26 2.23 2.18 2.13 2.07 2.03 1.98 1.93 1.91 1.87 1.84 1.81 1.80 1.78 23: 4.28 3.42 3.03 2.80 2.64 2.53 2.45 2.38 2.32 2.28 2.24 2.20 2.14 2.10 2.04 2.00 1.96 1.91 1.88 1.84 1.82 1.79 1.77 1.76 24: 4.26 3.40 3.01 2.78 2.62 2.51 2.43 2.36 2.30 2.26 2.22 2.18 2.13 2.09 2.02 1.98 1.94 1.89 1.86 1.82 1.80 1.76 1.74 1.73 25: 4.24 3.38 2.99 2.76 2.60 2.49 2.41 2.34 2.28 2.24 2.20 2.16 2.11 2.06 2.00 1.96 1.92 1.87 1.84 1.80 1.77 1.74 1.72 1.71 26: 4.22 3.37 2.98 2.74 2.59 2.47 2.39 2.32 2.27 2.22 2.18 2.15 2.10 2.05 1.99 1.95 1.90 1.85 1.82 1.78 1.76 1.72 1.70 1.69 27: 4.21 3.35 2.96 2.73 2.57 2.46 2.37 2.30 2.25 2.20 2.16 2.13 2.08 2.03 1.97 1.93 1.88 1.84 1.80 1.76 1.74 1.71 1.68 1.67 28: 4.20 3.34 2.95 2.71 2.56 2.44 2.36 2.29 2.24 2.19 2.15 2.12 2.06 2.02 1.96 1.91 1.87 1.81 1.78 1.75 1.72 1.69 1.67 1.65 29: 4.18 3.33 2.93 2.70 2.54 2.43 2.35 2.28 2.22 2.18 2.14 2.10 2.05 2.00 1.94 1.90 1.85 1.80 1.77 1.73 1.71 1.68 1.65 1.64 30: 4.17 3.32 2.92 2.69 2.53 2.42 2.34 2.27 2.21 2.16 2.12 2.09 2.04 1.99 1.93 1.89 1.84 1.79 1.76 1.72 1.69 1.66 1.64 1.62 32: 4.15 3.30 2.90 2.67 2.51 2.40 2.32 2.25 2.19 2.14 2.10 2.07 2.02 1.97 1.91 1.86 1.82 1.76 1.74 1.69 1.67 1.64 1.61 1.59 34: 4.13 3.28 2.88 2.65 2.49 2.38 2.30 2.23 2.17 2.12 2.08 2.05 2.00 1.95 1.89 1.84 1.80 1.74 1.71 1.67 1.64 1.61 1.59 1.57 36: 4.11 3.26 2.86 2.63 2.48 2.36 2.28 2.21 2.15 2.10 2.06 2.03 1.98 1.93 1.87 1.82 1.78 1.72 1.69 1.65 1.62 1.59 1.56 1.55 38: 4.10 3.25 2.85 2.62 2.46 2.35 2.26 2.19 2.14 2.09 2.05 2.02 1.96 1.92 1.85 1.80 1.76 1.71 1.67 1.63 1.60 1.57 1.54 1.53 40: 4.08 3.23 2.84 2.61 2.45 2.34 2.25 2.18 2.12 2.07 2.04 2.00 1.95 1.90 1.84 1.79 1.74 1.69 1.66 1.61 1.59 1.55 1.53 1.51 42: 4.07 3.22 2.83 2.59 2.44 2.32 2.24 2.17 2.11 2.06 2.02 1.99 1.94 1.89 1.82 1.78 1.73 1.68 1.64 1.60 1.57 1.54 1.51 1.49 44: 4.06 3.21 2.82 2.58 2.43 2.31 2.23 2.16 2.10 2.05 2.01 1.98 1.92 1.88 1.81 1.76 1.72 1.66 1.63 1.58 1.56 1.52 1.50 1.48 46: 4.05 3.20 2.81 2.57 2.42 2.30 2.22 2.14 2.09 2.04 2.00 1.97 1.91 1.87 1.80 1.75 1.71 1.65 1.62 1.57 1.54 1.51 1.48 1.46 48: 4.04 3.19 2.80 2.56 2.41 2.30 2.21 2.14 2.08 2.03 1.99 1.96 1.90 1.86 1.79 1.74 1.70 1.64 1.61 1.56 1.53 1.50 1.47 1.45 50: 4.03 3.18 2.79 2.56 2.40 2.29 2.20 2.13 2.07 2.02 1.98 1.95 1.90 1.85 1.78 1.74 1.69 1.63 1.60 1.55 1.52 1.48 1.46 1.44 55: 4.02 3.17 2.78 2.54 2.38 2.27 2.18 2.11 2.05 2.00 1.97 1.93 1.88 1.83 1.76 1.72 1.67 1.61 1.58 1.52 1.50 1.46 1.43 1.41 60: 4.00 3.15 2.76 2.52 2.37 2.25 2.17 2.10 2.04 1.99 1.95 1.92 1.86 1.81 1.75 1.70 1.65 1.59 1.56 1.50 1.48 1.44 1.41 1.39 65: 3.99 3.14 2.75 2.51 2.36 2.24 2.15 2.08 2.02 1.98 1.94 1.90 1.85 1.80 1.73 1.68 1.63 1.57 1.54 1.49 1.46 1.42 1.39 1.37 70: 3.98 3.13 2.74 2.50 2.35 2.23 2.14 2.07 2.01 1.97 1.93 1.89 1.84 1.79 1.72 1.67 1.62 1.56 1.53 1.47 1.45 1.40 1.37 1.35 80: 3.96 3.11 2.72 2.48 2.33 2.21 2.12 2.05 1.99 1.95 1.91 1.88 1.82 1.77 1.70 1.65 1.60 1.54 1.51 1.45 1.42 1.38 1.35 1.32 100: 3.94 3.09 2.70 2.46 2.30 2.19 2.10 2.03 1.97 1.92 1.88 1.85 1.79 1.75 1.68 1.63 1.57 1.51 1.48 1.42 1.39 1.34 1.30 1.28 125: 3.92 3.07 2.8 2.44 2.29 2.17 2.08 2.01 1.95 1.90 1.86 1.83 1.77 1.72 1.65 1.60 1.55 1.49 1.45 1.39 1.36 1.31 1.27 1.25 150: 3.91 3.06 2.67 2.43 2.27 2.16 2.07 2.00 1.94 1.89 1.85 1.82 1.76 1.71 1.64 1.59 1.54 1.47 1.44 1.37 1.34 1.29 1.25 1.22 200: 3.89 3.04 2.65 2.41 2.26 2.14 2.05 1.98 1.92 1.87 1.83 1.80 1.74 1.69 1.62 1.57 1.52 1.45 1.42 1.35 1.32 1.26 1.22 1.19 400: 3.86 3.02 2.62 2.39 2.23 2.12 2.03 1.96 1.90 1.85 1.81 1.78 1.72 1.67 1.60 1.54 1.49 1.42 1.38 1.32 1.28 1.22 1.16 1.13 1000: 3.85 3.00 2.61 2.38 2.22 2.10 2.02 1.95 1.89 1.84 1.80 1.76 1.70 1.65 1.58 1.53 1.47 1.41 1.36 1.30 1.26 1.19 1.13 1.08 INF: 3.84 2.99 2.60 2.37 2.21 2.09 2.01 1.94 1.88 1.83 1.79 1.75 1.69 1.64 1.57 1.52 1.46 1.40 1.35 1.28 1.24 1.17 1.11 1.00 END_OF_TABLE @FtableRows = split(/\n/, $FtableData05); undef $FtableData05; @F_n1Counts = split(/ /, shift @FtableRows); while (@FtableRows) { $row = (shift @FtableRows) . (shift @FtableRows); ($n2, $cutoffs) = split(/:\s*/, $row); push(@F_n2Counts, $n2); @cutoffs = split(/\s+/, $cutoffs); for $n1 (@F_n1Counts) { $Ftable05{"$n1:$n2"} = shift(@cutoffs); } } $FtableData01 = <<'END_OF_TABLE'; 1 2 3 4 5 6 7 8 9 10 11 12 14 16 20 24 30 40 50 75 100 200 500 INF 1: 405200 499900 540300 562500 576400 585900 592800 598100 602200 605600 608200 610600 614200 616900 620800 623400 625800 628600 630200 632300 633400 635200 636100 636600 2: 9849 9900 9917 9925 9930 9933 9934 9936 9938 9940 9941 9942 9943 9944 9945 9946 9947 9948 9948 9949 9949 9949 9950 9950 3: 3412 3082 2946 2871 2824 2791 2767 2749 2734 2723 2713 2705 2692 2683 2669 2660 2650 2641 2635 2627 2623 2618 2614 2612 4: 2120 1800 1669 1598 1552 1521 1498 1480 1466 1454 1445 1437 1424 1415 1402 1393 1383 1374 1369 1361 1357 1352 1348 1346 5: 1626 1327 1206 1139 1097 1067 1045 1027 1015 1005 996 989 977 968 955 947 938 929 924 917 913 907 904 902 6: 1374 1092 978 915 875 847 826 810 798 787 779 772 760 752 739 731 723 714 709 702 699 694 690 688 7: 1225 955 845 785 746 719 700 684 671 662 654 647 635 627 615 607 598 590 585 578 575 570 567 565 8: 1126 865 759 701 663 637 619 603 591 582 574 567 556 548 536 528 520 511 506 500 496 491 488 486 9: 1056 802 699 642 606 580 562 547 535 526 518 511 500 492 480 473 464 456 451 445 441 436 433 431 10: 1004 756 655 599 564 539 521 506 495 485 478 471 460 452 441 433 425 417 412 405 401 396 393 391 11: 965 720 622 567 532 507 488 474 463 454 446 440 429 421 410 402 394 386 380 374 370 366 362 360 12: 933 693 595 541 506 482 465 450 439 430 422 416 405 398 386 378 370 361 356 349 346 341 338 336 13: 907 670 574 520 486 462 444 430 419 410 402 396 385 378 367 359 351 342 337 330 327 321 318 316 14: 886 651 556 503 469 446 428 414 403 394 386 380 370 362 351 343 334 326 321 314 311 306 302 300 15: 868 636 542 489 456 432 414 400 389 380 373 367 356 348 336 329 320 312 307 300 297 292 289 287 16: 853 623 529 477 444 420 403 389 378 369 361 355 345 337 325 318 310 301 296 289 286 280 277 275 17: 840 611 518 467 434 410 393 379 368 359 352 345 335 327 316 308 300 292 286 279 276 270 267 265 18: 828 601 509 458 425 401 385 371 360 351 344 337 327 319 307 300 291 283 278 271 268 262 259 257 19: 818 593 501 450 417 394 377 363 352 343 336 330 319 312 300 292 284 276 270 263 260 254 251 249 20: 810 585 494 443 410 387 371 356 345 337 330 323 313 305 294 286 277 269 263 256 253 247 244 242 21: 802 578 487 437 404 381 365 351 340 331 324 317 307 299 288 280 272 263 258 251 247 242 238 236 22: 794 572 482 431 399 376 359 345 335 326 318 312 302 294 283 275 267 258 253 246 242 237 233 231 23: 788 566 476 426 394 371 354 341 330 321 314 307 297 289 278 270 262 253 248 241 237 232 228 226 24: 782 561 472 422 390 367 350 336 325 317 309 303 293 285 274 266 258 249 244 236 233 227 223 221 25: 777 557 468 418 386 363 346 332 321 313 305 299 289 281 270 262 254 245 240 232 229 223 219 217 26: 772 553 464 414 382 359 342 329 317 309 302 296 286 277 266 258 250 241 236 228 225 219 215 213 27: 768 549 460 411 379 356 339 326 314 306 298 293 283 274 263 255 247 238 233 225 221 216 212 210 28: 764 545 457 407 376 353 336 323 311 303 295 290 280 271 260 252 244 235 230 222 218 213 209 206 29: 760 542 454 404 373 350 333 320 308 300 292 287 277 268 257 249 241 232 227 219 215 210 206 203 30: 756 539 451 402 370 347 330 317 306 298 290 284 274 266 255 247 238 229 224 216 213 207 203 201 32: 750 534 446 397 366 342 325 312 301 294 286 280 270 262 251 242 234 225 220 212 208 202 198 196 34: 744 529 442 393 361 338 321 308 297 289 282 276 266 258 247 238 230 221 215 208 204 198 194 191 36: 739 525 438 389 358 335 318 304 294 286 278 272 262 254 243 235 226 217 212 204 200 194 190 187 38: 735 521 434 386 354 332 315 302 291 282 275 269 259 251 240 232 222 214 208 200 197 190 186 184 40: 731 518 431 383 351 329 312 299 288 280 273 266 256 249 237 229 220 211 205 197 194 188 184 181 42: 727 515 429 380 349 326 310 296 286 277 270 264 254 246 235 226 217 208 202 194 191 185 180 178 44: 724 512 426 378 346 324 307 294 284 275 268 262 252 244 232 224 215 206 200 192 188 182 178 175 46: 721 510 424 376 344 322 305 292 282 273 266 260 250 242 230 222 213 204 198 190 186 180 176 172 48: 719 508 422 374 342 320 304 290 280 271 264 258 248 240 228 220 211 202 196 188 184 178 173 170 50: 717 506 420 372 341 318 302 288 278 270 262 256 246 239 226 218 210 200 194 186 182 176 171 168 55: 712 501 416 368 337 315 298 285 275 266 259 253 243 235 223 215 206 196 190 182 178 171 166 164 60: 708 498 413 365 334 312 295 282 273 263 256 250 240 232 220 212 203 193 187 179 174 168 163 160 65: 704 495 410 362 331 309 293 279 270 261 254 247 237 230 218 209 200 190 184 176 171 164 160 156 70: 701 492 408 360 329 307 291 277 267 259 251 245 235 228 215 207 198 188 182 174 169 162 156 153 80: 696 488 404 356 325 304 287 274 264 255 248 241 232 224 211 203 194 184 178 170 165 157 152 149 100: 690 482 398 351 320 299 282 269 259 251 243 236 226 219 206 198 189 179 173 164 159 151 146 143 125: 684 478 394 347 317 295 279 265 256 247 240 233 223 215 203 194 185 175 168 159 154 146 140 137 150: 681 475 391 344 314 292 276 262 253 244 237 230 220 212 200 191 183 172 166 156 151 143 137 133 200: 676 471 388 341 311 290 273 260 250 241 234 228 217 209 197 188 179 169 162 153 148 139 133 128 400: 670 466 383 336 306 285 269 255 246 237 229 223 212 204 192 184 174 164 157 147 142 132 124 119 1000: 666 462 380 334 304 282 266 253 243 234 226 220 209 201 189 181 171 161 154 144 138 128 119 111 INF: 664 460 378 332 302 280 264 251 241 232 224 218 207 199 187 179 169 159 152 141 136 125 115 100 END_OF_TABLE @FtableRows = split(/\n/, $FtableData01); undef $FtableData01; shift @FtableRows; while (@FtableRows) { $row = (shift @FtableRows) . (shift @FtableRows); ($n2, $cutoffs) = split(/:\s*/, $row); @cutoffs = split(/\s+/, $cutoffs); for $n1 (@F_n1Counts) { $Ftable01{"$n1:$n2"} = shift(@cutoffs) / 100; } } &Init; &LoadData; #For now assume always stdin &ReadData; &Compute; &Report; sub Init { $nIndependentVars = $nVars - 1; #number of independent variables for ($var = 0; $var < @varNames; $var++) { &InitVar($var); } } sub InitVar { local($var) = @_; $sumRawScores{"$var"} = 0; $sumSquaredRawScores{"$var"} = 0; } sub LoadData { # open(IN, ") { chop $line; if ($line =~ /:/ || $line =~ /^#/) { &ReadDirective($line); next; } next if $line =~ /^\s*$/; @vars = split(/\s+/, $line); if (!$nVars) { $nVars = @vars; if ($nVars == 2) { $varNames[0] = 'X'; $varNames[1] = 'Y'; } else { for ($i = 0; $i < $nVars - 1; $i++) { $subscript = $i + 1; $varNames[$i] = "X$subscript"; } $varNames[$i] = 'Y'; } &Init; } elsif (@vars != $nVars) { die "|$line| doesn't have $nVars variables"; } push(@lines, join(' ', @vars)); } # close(IN); } sub ReadData { local($i) = 0; for $line (@lines) { @vars = split(/ /, $line); $sampleSize++; for ($nthVar = 0; $nthVar < $nVars; $nthVar++) { &InsertScore($nthVar, $i, $vars[$nthVar]); } $i++; } } sub InsertScore { local($var, $index, $score) = @_; $rawScores{"$var:$index"} = $score; $sumRawScores{"$var"} += $score; local($scoreSquared) = $score ** 2; $squaredRawScores{"$var:$index"} = $scoreSquared; $sumSquaredRawScores{"$var"} += $scoreSquared; } sub Compute { for ($var = 0; $var < @varNames; $var++) { &SecondPass($var); } &Products; &BinaryRegressions; } sub SecondPass { local($var) = @_; $mean{$var} = $sumRawScores{$var} / $sampleSize; local($var) = @_; local($i, $dev, $squaredDev); local($mean) = $mean{$var}; $sumOfSquaredDeviationsFromMean{$var} = 0; for ($i = 0; $i < $sampleSize; $i++) { $dev = $rawScores{"$var:$i"} - $mean; $deviationsFromMean{"$var:$i"} = $dev; $squaredDev = $dev ** 2; $squaredDeviationsFromMean{"$var:$i"} = $squaredDev; $sumOfSquaredDeviationsFromMean{$var} += $squaredDev; } $sampleVariance{$var} = $sumOfSquaredDeviationsFromMean{$var} / ($sampleSize - 1); $standardDeviation{$var} = sqrt($sampleVariance{$var}); } sub Products { local($var1, $var2); for ($var1 = 0; $var1 < $nVars - 1; $var1++) { for ($var2 = $var1 + 1; $var2 < $nVars; $var2++) { &Product($var1, $var2); &Regress($var1, $var2); } } } sub Product { local($var1, $var2) = @_; local($i) = 0; $sumOfProducts{"$var1:$var2"} = 0; $sumOfCrossProductDeviationFromMean{"$var1:$var2"} = 0; for ($i = 0; $i < $sampleSize; $i++) { $prod = $rawScores{"$var1:$i"} * $rawScores{"$var2:$i"}; $product{"$var1:$var2:$i"} = $prod; $sumOfProducts{"$var1:$var2"} += $prod; $crossProductDeviationFromMean = $deviationsFromMean{"$var1:$i"} * $deviationsFromMean{"$var2:$i"}; $crossProductDeviationFromMean{"$var1:$var2:$i"} = $crossProductDeviationFromMean; $sumOfCrossProductDeviationFromMean{"$var1:$var2"} += $crossProductDeviationFromMean; } $sampleCovariance{"$var1:$var2"} = $sumOfCrossProductDeviationFromMean{"$var1:$var2"} / ($sampleSize - 1); } sub Regress { local($X, $Y) = @_; local($pair) = "$X:$Y"; local($i); $regressionSlope{$pair} = $sumOfCrossProductDeviationFromMean{$pair} / $sumOfSquaredDeviationsFromMean{$X}; $regressionIntercept{$pair} = $mean{$Y} - $regressionSlope{$pair} * $mean{$X}; $regressionSumOfSquares{$pair} = 0; $residualSumOfSquares{$pair} = 0; for ($i = 0; $i < $sampleSize; $i++) { $prediction{"$pair:$i"} = $regressionIntercept{$pair} + ($regressionSlope{$pair} * $rawScores{"$X:$i"}); #print("Y'($i) = ", $prediction{"$pair:$i"}, "\n"); } &RegressionFacts($pair, $Y); $correlation{$pair} = sqrt($propDepSumSquaresDueToRegression{$pair}); $propDepSumSquaresDueToResidual{$pair} = $residualSumOfSquares{$pair} / $sumOfSquaredDeviationsFromMean{$Y}; # p. 26: $dfRegression{$pair} = 1; $dfResidual{$pair} = $sampleSize - 2; $varianceOfEstimate{$pair} = #p. 27. = MSR (Mean Square Residual) $residualSumOfSquares{"$pair"} / $dfResidual{$pair}; $standardErrorOfEstimate{$pair} = sqrt($varianceOfEstimate{$pair}); #p. 28 for ($i = 0; $i < $sampleSize; $i++) { $standardizedResiduals{"$pair:$i"} = $residual{"$pair:$i"} / $standardErrorOfEstimate{$pair}; } $F{$pair} = ($regressionSumOfSquares{$pair} / $dfRegression{$pair}) / ($varianceOfEstimate{$pair}); $Fsignif{".05:$pair"} = &Fsignif(*Ftable05, $dfRegression{$pair}, $dfResidual{$pair}); $Fsignif{".01:$pair"} = &Fsignif(*Ftable01, $dfRegression{$pair}, $dfResidual{$pair}); $standardErrorOfRegrCoeff{$pair} = sqrt($varianceOfEstimate{$pair} / $sumOfSquaredDeviationsFromMean{$X}); &SlopeConfidenceInterval('.05', $pair, $Fsignif{".05:$pair"}); &SlopeConfidenceInterval('.01', $pair, $Fsignif{".01:$pair"}); $t{$pair} = $regressionSlope{$pair} / $standardErrorOfRegrCoeff{$pair}; } sub BinaryRegressions { local($ind1, $ind2); for ($ind1 = 0; $ind1 < $nVars - 2; $ind1++) { for ($ind2 = $ind1 + 1; $ind2 < $nVars - 1; $ind2++) { &BinaryRegression($ind1, $ind2, $nVars - 1); } } } sub BinaryRegression { local($ind1, $ind2, $dep) = @_; local($triplet) = "$ind1:$ind2:$dep"; $regressionSlope{"$ind1:$ind2:$dep"} = (($sumOfSquaredDeviationsFromMean{$ind2} * $sumOfCrossProductDeviationFromMean{"$ind1:$dep"}) - ($sumOfCrossProductDeviationFromMean{"$ind1:$ind2"} * $sumOfCrossProductDeviationFromMean{"$ind2:$dep"})) / (($sumOfSquaredDeviationsFromMean{$ind1} * $sumOfSquaredDeviationsFromMean{$ind2}) - ($sumOfCrossProductDeviationFromMean{"$ind1:$ind2"}**2)); $standRegressionSlope{"$ind1:$ind2:$dep"} = ($correlation{"$ind1:$dep"} - ($correlation{"$ind2:$dep"} * $correlation{"$ind1:$ind2"})) / (1 - ($correlation{"$ind1:$ind2"}**2)); $regressionSlope{"$ind2:$ind1:$dep"} = (($sumOfSquaredDeviationsFromMean{$ind1} * $sumOfCrossProductDeviationFromMean{"$ind2:$dep"}) - ($sumOfCrossProductDeviationFromMean{"$ind1:$ind2"} * $sumOfCrossProductDeviationFromMean{"$ind1:$dep"})) / (($sumOfSquaredDeviationsFromMean{$ind1} * $sumOfSquaredDeviationsFromMean{$ind2}) - ($sumOfCrossProductDeviationFromMean{"$ind1:$ind2"}**2)); $standRegressionSlope{"$ind2:$ind1:$dep"} = ($correlation{"$ind2:$dep"} - ($correlation{"$ind1:$dep"} * $correlation{"$ind1:$ind2"})) / (1 - ($correlation{"$ind1:$ind2"}**2)); $regressionIntercept{"$ind1:$ind2:$dep"} = $mean{$dep} - ($regressionSlope{"$ind1:$ind2:$dep"} * $mean{$ind1}) - ($regressionSlope{"$ind2:$ind1:$dep"} * $mean{$ind2}); for ($i = 0; $i < $sampleSize; $i++) { $prediction{"$triplet:$i"} = $regressionIntercept{"$ind1:$ind2:$dep"} + $regressionSlope{"$ind1:$ind2:$dep"} * $rawScores{"$ind1:$i"} + $regressionSlope{"$ind2:$ind1:$dep"} * $rawScores{"$ind2:$i"}; } &RegressionFacts($triplet, $dep); $dfRegression{$triplet} = 2; $dfResidual{$triplet} = $sampleSize - 3; $F{$triplet} = ($regressionSumOfSquares{$triplet} / $dfRegression{$triplet}) / ($residualSumOfSquares{$triplet} / $dfResidual{$triplet}); $Fsignif{".05:$triplet"} = &Fsignif(*Ftable05, $dfRegression{$triplet}, $dfResidual{$triplet}); $Fsignif{".01:$triplet"} = &Fsignif(*Ftable01, $dfRegression{$triplet}, $dfResidual{$triplet}); $varianceOfEstimate{$triplet} = #p. 27, 60. = MSR (Mean Square Residual) $residualSumOfSquares{$triplet} / $dfResidual{$triplet}; $uncorrelation = 1 - ($correlation{"$ind1:$ind2"} ** 2); $standardErrorOfRegrCoeff{"$ind1:$ind2:$dep"} = sqrt( $varianceOfEstimate{$triplet} / ($sumOfSquaredDeviationsFromMean{$ind1} * $uncorrelation)); $tOfRegrCoeff{"$ind1:$ind2:$dep"} = $regressionSlope{"$ind1:$ind2:$dep"} / $standardErrorOfRegrCoeff{"$ind1:$ind2:$dep"}; $standardErrorOfRegrCoeff{"$ind2:$ind1:$dep"} = sqrt( $varianceOfEstimate{$triplet} / ($sumOfSquaredDeviationsFromMean{$ind2} * $uncorrelation)); $tOfRegrCoeff{"$ind2:$ind1:$dep"} = $regressionSlope{"$ind2:$ind1:$dep"} / $standardErrorOfRegrCoeff{"$ind2:$ind1:$dep"}; &SlopeConfidenceInterval( '.05', "$ind1:$ind2:$dep", &Fsignif(*Ftable05, 1, $dfResidual{$triplet})); &SlopeConfidenceInterval( '.01', "$ind1:$ind2:$dep", &Fsignif(*Ftable01, 1, $dfResidual{$triplet})); &SlopeConfidenceInterval( '.05', "$ind2:$ind1:$dep", &Fsignif(*Ftable05, 1, $dfResidual{$triplet})); &SlopeConfidenceInterval( '.01', "$ind2:$ind1:$dep", &Fsignif(*Ftable01, 1, $dfResidual{$triplet})); $incrDueTo{"$ind1:$ind2:$dep"} = $propDepSumSquaresDueToRegression{"$ind1:$ind2:$dep"} - $propDepSumSquaresDueToRegression{"$ind2:$dep"}; $Fincr{"$ind1:$ind2:$dep"} = $incrDueTo{"$ind1:$ind2:$dep"} / ((1 - $propDepSumSquaresDueToRegression{"$ind1:$ind2:$dep"}) / $dfResidual{$triplet}); $FincrSignif{".05:$ind1:$ind2:$dep"} = &Fsignif(*Ftable05, 1, $dfResidual{$triplet}); $FincrSignif{".01:$ind1:$ind2:$dep"} = &Fsignif(*Ftable01, 1, $dfResidual{$triplet}); $incrDueTo{"$ind2:$ind1:$dep"} = $propDepSumSquaresDueToRegression{"$ind1:$ind2:$dep"} - $propDepSumSquaresDueToRegression{"$ind1:$dep"}; $Fincr{"$ind2:$ind1:$dep"} = $incrDueTo{"$ind2:$ind1:$dep"} / ((1 - $propDepSumSquaresDueToRegression{"$ind1:$ind2:$dep"}) / $dfResidual{$triplet}); $FincrSignif{".05:$ind2:$ind1:$dep"} = &Fsignif(*Ftable05, 1, $dfResidual{$triplet}); $FincrSignif{".01:$ind2:$ind1:$dep"} = &Fsignif(*Ftable01, 1, $dfResidual{$triplet}); } sub RegressionFacts { local($indices, $Y) = @_; local($i); $regressionSumOfSquares{"$indices"} = 0; $residualSumOfSquares{"$indices"} = 0; for ($i = 0; $i < $sampleSize; $i++) { $devDueToRegression{"$indices:$i"} = $prediction{"$indices:$i"} - $mean{$Y}; #print("devDueToRegression($i) = ", # $devDueToRegression{"$indices:$i"}, "\n"); $squaredDevDueToRegression{"$indices:$i"} = $devDueToRegression{"$indices:$i"} ** 2; #print("squaredDevDueToRegression($i) = ", # $squaredDevDueToRegression{"$indices:$i"}, "\n"); $regressionSumOfSquares{"$indices"} += $squaredDevDueToRegression{"$indices:$i"}; $residual{"$indices:$i"} = $rawScores{"$Y:$i"} - $prediction{"$indices:$i"}; #print("residual($i) = ", $residual{"$indices:$i"}, "\n"); $squaredResidual{"$indices:$i"} = $residual{"$indices:$i"} ** 2; #print("squared residual($i) = ", $squaredResidual{"$indices:$i"}, "\n"); $residualSumOfSquares{"$indices"} += $squaredResidual{"$indices:$i"}; } #The following is the same as the Pearson product moment # coefficient of correlation: $propDepSumSquaresDueToRegression{$indices} = $regressionSumOfSquares{$indices} / $sumOfSquaredDeviationsFromMean{$Y}; } sub SlopeConfidenceInterval { local($p, $index, $F) = @_; local($confidenceRange) = sqrt($F) * $standardErrorOfRegrCoeff{"$index"}; $slopeConfidInterval{"$p:$index"} = ($regressionSlope{$index} - $confidenceRange) . ' .. ' . ($regressionSlope{$index} + $confidenceRange); } sub Fsignif { local(*Ftable, $n1, $n2) = @_; local($lowerN1, $higherN1) = split(/ /, &GetNearestN($n1, *F_n1Counts)); local($lowerN2, $higherN2) = split(/ /, &GetNearestN($n2, *F_n2Counts)); &biggest( $Ftable{"$lowerN1:$lowerN2"}, $Ftable{"$lowerN1:$higherN2"}, $Ftable{"$higherN1:$lowerN2"}, $Ftable{"$higherN1:$higherN2"}); } sub biggest { local($biggest) = 0; for (@_) {$biggest = $_ if $_ > $biggest;} $biggest; } sub GetNearestN { local($n, *list) = @_; local($i); for ($i = 0; $i < @list; $i++) { if ($list[$i] eq 'INF' or $list[$i] > $n) { return "$list[$i - 1] $list[$i]"; } if ($list[$i] == $n) {return "$n $n";} } } sub Report { print("Sample size = $sampleSize\n"); for ($var = 0; $var < @varNames; $var++) { &PerVarStats($var); } &ReportPairs; &ReportBinaryRegressions; } sub ReportPairs { local($var1, $var2); for ($var1 = 0; $var1 < $nVars - 1; $var1++) { for ($var2 = $var1 + 1; $var2 < $nVars; $var2++) { &ReportPair($var1, $var2); } } } sub ReportPair { local($var1, $var2) = @_; print "For `$varNames[$var1]' -> `$varNames[$var2]':\n"; local($pair) = "$var1:$var2"; print(" Sum of products = ", $sumOfProducts{$pair}, "\n"); print(" Sum of cross products deviations from the means = ", $sumOfCrossProductDeviationFromMean{$pair}, "\n"); print(" Correlation = $correlation{$pair}\n"); if ($var2 == $nVars - 1) { print(" Sample covariance = ", $sampleCovariance{$pair}, "\n"); print(" Regression formula: Y = $regressionIntercept{$pair} + $regressionSlope{$pair}X\n"); print(" Regression sum of squares = $regressionSumOfSquares{$pair}\n"); print(" Residual sum of squares = $residualSumOfSquares{$pair}\n"); print(" Proportion of sum of squares of dependent due to...\n"); print(" regression = $propDepSumSquaresDueToRegression{$pair}\n"); print(" residual = $propDepSumSquaresDueToResidual{$pair}\n"); print(" Degrees of freedom of the regression = $dfRegression{$pair}\n"); print(" Degrees of freedom of the residual = $dfResidual{$pair}\n"); print(" Variance of estimate (Mean Square Residual) = ", $varianceOfEstimate{$pair}, "\n"); print(" Standard error of estimate (standard deviation of residuals) = ", $standardErrorOfEstimate{$pair}, "\n"); &PrintF($pair); print(" Standard error of regression coefficient = ", $standardErrorOfRegrCoeff{$pair}, "\n"); print(" Confidence interval of regression slope ...\n"); print(" at .05 = ", $slopeConfidInterval{".05:$pair"}, "\n"); print(" at .01 = ", $slopeConfidInterval{".01:$pair"}, "\n"); print(" t ratio = $t{$pair} ($dfResidual{$pair} df)\n"); #To do: send plots off to separate files. Study gnuplot format. #print(" Predicted values vs. Standardized residuals:\n"); #for ($i = 0; $i < $sampleSize; $i++) { # print(" ", $prediction{"$pair:$i"}, ' ', # $standardizedResiduals{"$pair:$i"}, "\n"); #} #To do: plot to scatter plot X-Y, and draw regression line. #Consider doing log xform for free. } } sub PrintF { local($i) = @_; &PrintThisF($F{$i}, $Fsignif{".01:$i"}, $Fsignif{".05:$i"}); } sub PrintThisF { local($F, $signif01, $signif05) = @_; print(" F ratio = $F"); if ($F > $signif01) {print " (p < .01)";} elsif ($F > $signif05) {print " (p < .05)";} else {print(" (n.s.; cutoff at .05 is ", $signif05, ")");} print "\n"; } sub ReportBinaryRegressions { local($ind1, $ind2); for ($ind1 = 0; $ind1 < $nVars - 2; $ind1++) { for ($ind2 = $ind1 + 1; $ind2 < $nVars - 1; $ind2++) { &ReportBinaryRegression($ind1, $ind2, $nVars - 1); } } } sub ReportBinaryRegression { local($ind1, $ind2, $dep) = @_; local($triplet) = "$ind1:$ind2:$dep"; print "($varNames[$ind1], $varNames[$ind2]) -> $varNames[$dep]\n"; print " Regression formula: $varNames[$dep] = ", $regressionIntercept{"$ind1:$ind2:$dep"}, " + \n ", $regressionSlope{"$ind1:$ind2:$dep"}, " $varNames[$ind1] + \n ", $regressionSlope{"$ind2:$ind1:$dep"}, " $varNames[$ind2]\n"; print " Standardized regression coefficient for ...\n"; print " $varNames[$ind1] = ", $standRegressionSlope{"$ind1:$ind2:$dep"}, "\n"; print " $varNames[$ind2] = ", $standRegressionSlope{"$ind2:$ind1:$dep"}, "\n"; print " Regression sum of squares = ", $regressionSumOfSquares{"$ind1:$ind2:$dep"}, "\n"; print " Residual sum of squares = ", $residualSumOfSquares{"$ind1:$ind2:$dep"}, "\n"; print " Squared multiple correlation = ", $propDepSumSquaresDueToRegression{"$ind1:$ind2:$dep"}, "\n"; print " "; &PrintF("$ind1:$ind2:$dep"); print " Variance of estimate = ", $varianceOfEstimate{$triplet}, "\n"; print " Standard error of coefficient for ...\n"; print " $varNames[$ind1] = ", $standardErrorOfRegrCoeff{"$ind1:$ind2:$dep"}, "\n"; print " $varNames[$ind2] = ", $standardErrorOfRegrCoeff{"$ind2:$ind1:$dep"}, "\n";; print " t score of coefficient for ... ($dfResidual{$triplet} df)\n"; print " $varNames[$ind1] = ", $tOfRegrCoeff{"$ind1:$ind2:$dep"}, "\n"; print " $varNames[$ind2] = ", $tOfRegrCoeff{"$ind2:$ind1:$dep"}, "\n"; print(" Confidence interval of regression slope for ...\n"); print(" $varNames[$ind1]:\n"); print(" at .05 = ", $slopeConfidInterval{".05:$ind1:$ind2:$dep"}, "\n"); print(" at .01 = ", $slopeConfidInterval{".01:$ind1:$ind2:$dep"}, "\n"); print(" $varNames[$ind2]:\n"); print(" at .05 = ", $slopeConfidInterval{".05:$ind2:$ind1:$dep"}, "\n"); print(" at .01 = ", $slopeConfidInterval{".01:$ind2:$ind1:$dep"}, "\n"); print(" Increment in proportion of variance accounted for by...\n"); print(" $varNames[$ind1] (over $varNames[$ind2]) = ", $incrDueTo{"$ind1:$ind2:$dep"}, "\n"); print(" "); &PrintThisF($Fincr{"$ind1:$ind2:$dep"}, $FincrSignif{".01:$ind1:$ind2:$dep"}, $FincrSignif{".05:$ind1:$ind2:$dep"}); print(" $varNames[$ind2] (over $varNames[$ind1]) = ", $incrDueTo{"$ind2:$ind1:$dep"}, "\n"); print(" "); &PrintThisF($Fincr{"$ind2:$ind1:$dep"}, $FincrSignif{".01:$ind2:$ind1:$dep"}, $FincrSignif{".05:$ind2:$ind1:$dep"}); } sub PerVarStats { local($var) = @_; print("$varNames[$var]:\n"); print(" Sum of raw scores = ", $sumRawScores{$var}, "\n"); print(" Mean = ", $mean{$var}, "\n"); print(" Sum of squared raw scores = ", $sumSquaredRawScores{$var}, "\n"); print(" Sum of squared deviations from the mean = ", $sumOfSquaredDeviationsFromMean{$var}, "\n"); print(" Sample variance = ", $sampleVariance{$var}, "\n"); print(" Standard deviation = ", $standardDeviation{$var}, "\n"); } sub ReadDirective { local($line) = @_; $line =~ s/^#//; ($tag, $value) = split(/\s*:\s*/, $line); if ($tag =~ /varnames/) { (@varNames) = split(/\s*,\s*/, $value); if ($nVars) { if (@varNames != $nVars) { die "varnames directive changes the number of variables"; } } else { $nVars = @varNames; &Init; } } } __END__