olympics.run <-read.delim("~/data/olympics.run.txt")
run<-olympics.run[,-1]
row.names(run)<-olympics.run[,1]
run
## X100m X200m X400m X800m X1500m
## 1972 10.14 20.00 44.66 105.90 216.30
## 1976 10.06 20.23 44.26 103.50 219.17
## 1980 10.25 20.19 44.60 105.40 218.40
## 1984 9.99 19.80 44.27 103.00 212.53
## 1988 9.92 19.75 43.87 103.45 215.96
## 1992 9.96 20.01 43.50 103.66 220.12
## 1996 9.84 19.32 43.49 102.58 215.78
## 2000 9.87 20.09 43.84 105.08 212.07
## 2004 9.85 19.79 44.00 104.45 214.18
additive.fit<-medpolish(run)
## 1: 31
## 2: 28.7
## Final: 28.7
additive.fit
##
## Median Polish Results (Dataset: "run")
##
## Overall: 44.02
##
## Row Effects:
## 1972 1976 1980 1984 1988 1992 1996 2000 2004
## 0.27 0.24 0.58 -0.05 -0.15 0.09 -0.53 0.00 -0.02
##
## Column Effects:
## X100m X200m X400m X800m X1500m
## -34.15 -24.17 0.00 59.58 172.09
##
## Residuals:
## X100m X200m X400m X800m X1500m
## 1972 0.00 -0.12 0.37 2.03 -0.08
## 1976 -0.05 0.14 0.00 -0.34 2.82
## 1980 -0.20 -0.24 0.00 1.22 1.71
## 1984 0.17 0.00 0.30 -0.55 -3.53
## 1988 0.20 0.05 0.00 0.00 0.00
## 1992 0.00 0.07 -0.61 -0.03 3.92
## 1996 0.50 0.00 0.00 -0.49 0.20
## 2000 0.00 0.24 -0.18 1.48 -4.04
## 2004 0.00 -0.04 0.00 0.87 -1.91
Row.Part <- with(additive.fit, row + overall)
Col.Part <- additive.fit$col
plot2way(Row.Part, Col.Part,
dimnames(run)[[1]], dimnames(run)[[2]])
additive.fit$residuals
## X100m X200m X400m X800m X1500m
## 1972 0.00 -0.12 0.37 2.03 -0.08
## 1976 -0.05 0.14 0.00 -0.34 2.82
## 1980 -0.20 -0.24 0.00 1.22 1.71
## 1984 0.17 0.00 0.30 -0.55 -3.53
## 1988 0.20 0.05 0.00 0.00 0.00
## 1992 0.00 0.07 -0.61 -0.03 3.92
## 1996 0.50 0.00 0.00 -0.49 0.20
## 2000 0.00 0.24 -0.18 1.48 -4.04
## 2004 0.00 -0.04 0.00 0.87 -1.91
aplpack::stem.leaf(c(additive.fit$residual),
unit=1, m=5)
## 1 | 2: represents 12
## leaf unit: 1
## n: 45
## 1 f | 4
## 2 t | 3
## 15 -0* | 1000000000000
## (27) 0* | 000000000000000000000000111
## 3 t | 223
From the Additive Fit output, we can see:
• The average run time is 44.02 s, which is the common value.
• Generally, X100 costs 44.02 -34.15 = 9.87 seconds on average between 1972-2004.(Column Effects)
• 1976 tends to be 0.27-0.24 = 0.03 seconds faster than 1972 on average between running X100, X200, X400, X800 and X1500.(Row Effects)
• What we see is that most of the residuals are small – between -4 and 3, and there are only three large residuals that are set apart from the rest. These are -4.04 (X1500 in 2000), 3.92 (X1500 in 1992) and -3.53 (X1500 in 1984).
log.run <- log10(run)
log.run
## X100m X200m X400m X800m X1500m
## 1972 1.0060380 1.301030 1.649919 2.024896 2.335057
## 1976 1.0025980 1.305996 1.646011 2.014940 2.340781
## 1980 1.0107239 1.305136 1.649335 2.022841 2.339253
## 1984 0.9995655 1.296665 1.646110 2.012837 2.327420
## 1988 0.9965117 1.295567 1.642168 2.014730 2.334373
## 1992 0.9982593 1.301247 1.638489 2.015611 2.342660
## 1996 0.9929951 1.286007 1.638389 2.011063 2.334011
## 2000 0.9943172 1.302980 1.641871 2.021520 2.326479
## 2004 0.9934362 1.296446 1.643453 2.018908 2.330779
Additive.fit <- medpolish(log.run)
## 1: 0.1357058
## 2: 0.114946
## 3: 0.1132811
## Final: 0.1132811
Additive.fit
##
## Median Polish Results (Dataset: "log.run")
##
## Overall: 1.643266
##
## Row Effects:
## 1972 1976 1980 1984 1988
## 0.0066529940 0.0047265652 0.0069015991 0.0000000000 -0.0010980903
## 1992 1996 2000 2004
## 0.0003879230 -0.0048763170 -0.0013951792 -0.0002193961
##
## Column Effects:
## X100m X200m X400m X800m X1500m
## -0.6453943 -0.3466005 0.0000000 0.3726733 0.6890853
##
## Residuals:
## X100m X200m X400m X800m X1500m
## 1972 0.00151355 -0.0022882 0.00000000 0.00230395 -0.0039475
## 1976 0.00000000 0.0046041 -0.00198088 -0.00572523 0.0037035
## 1980 0.00595085 0.0015695 -0.00083247 0.00000000 0.0000000
## 1984 0.00169407 0.0000000 0.00284380 -0.00310179 -0.0049308
## 1988 -0.00026165 0.0000000 0.00000000 -0.00011043 0.0031204
## 1992 0.00000000 0.0041940 -0.00516439 -0.00071573 0.0099205
## 1996 0.00000000 -0.0057818 0.00000000 0.00000000 0.0065365
## 2000 -0.00215908 0.0077099 0.00000000 0.00697623 -0.0044766
## 2004 -0.00421579 0.0000000 0.00040635 0.00318883 -0.0013527
COMMON <- 10 ^ Additive.fit$overall
ROW <- 10 ^ Additive.fit$row
COL <- 10 ^ Additive.fit$col
RESIDUAL <- 10 ^ Additive.fit$residual
COMMON
## [1] 43.98106
ROW
## 1972 1976 1980 1984 1988 1992 1996 2000
## 1.0154370 1.0109428 1.0160185 1.0000000 0.9974747 1.0008936 0.9888347 0.9967926
## 2004
## 0.9994949
COL
## X100m X200m X400m X800m X1500m
## 0.2262589 0.4501938 1.0000000 2.3587032 4.8874836
RESIDUAL
## X100m X200m X400m X800m X1500m
## 1972 1.0034911 0.9947451 1.0000000 1.0053191 0.9909517
## 1976 1.0000000 1.0106578 0.9954492 0.9869037 1.0085641
## 1980 1.0137966 1.0036205 0.9980850 1.0000000 1.0000000
## 1984 1.0039084 1.0000000 1.0065696 0.9928833 0.9887106
## 1988 0.9993977 1.0000000 1.0000000 0.9997458 1.0072108
## 1992 1.0000000 1.0097038 0.9881790 0.9983533 1.0231058
## 1996 1.0000000 0.9867753 1.0000000 1.0000000 1.0151646
## 2000 0.9950409 1.0179113 1.0000000 1.0161931 0.9897451
## 2004 0.9903398 1.0000000 1.0009361 1.0073696 0.9968901
Year <- seq(1972, 2004, by=4)
Log.Distance <- log10(c(100, 200, 400, 800, 1500))
par(mfrow=c(1, 1))
plot(Year, Additive.fit$row,
ylab="Row Effect", pch=19,
main="Plot of Year Effects")
plot(Log.Distance, Additive.fit$col,
ylab="Col Effect", pch=19,
xlab="Log Distance",
main="Plot of Distance Effects")
From the Multiplicative Fit output(note all the values are the log time):
• 1.643266 is the common value
• 0.0066529940 is the additive effect due to the year 1972
• -0.6453943 is the additive effect due to the 100m race.
• 0.00151355 is the residual (what’s left of the data after taking out the additive fit).
To get a fit for the original time data, we can take the common, row effects, column effects, and residuals each to the 10th power.
From the graphs, we see
• There was a clear decrease trend in log time from 1972 to 1996 and increase after 1996. However, there are some temporary increase happened in 1980 and 1992.
• As we know, the log times increase linearly as a function of log distance. But this graph doesn’t show the fatigue effect – one could discover this by means of a residual plot from a linear fit to this graph.
In our dataset, after comparing between these fits, it should be clear that an additive model is not the best fit. If the table were additive, then one would expect the difference between the times in the first two columns to be a constant across rows. Likewise, the difference between any two columns should be a constant for each row. Since the 200m swim is twice the distance than 100m, you would expect the winning time to be roughly twice the winning time for the 100 meter swim. So the relationship between the columns of the table is multiplicative, rather than additive.
## X500m X1000m X1500m X5000m X10000m
## 1976 39.17 79.32 119.38 444.48 890.59
## 1980 38.03 75.18 115.44 422.29 868.13
## 1984 38.19 75.80 118.36 432.28 879.90
## 1988 36.45 73.03 112.06 404.63 828.20
## 1992 37.14 74.85 114.81 419.97 852.12
## 1994 36.33 72.43 111.29 394.96 810.55
## 1998 35.59 70.64 107.87 382.20 795.33
## 2002 34.42 67.18 103.95 374.66 778.92
## 2006 34.82 68.89 105.97 374.68 781.57
## X500m X1000m X1500m X5000m X10000m
## 1976 1.592954 1.899383 2.076932 2.647852 2.949678
## 1980 1.580126 1.876102 2.062356 2.625611 2.938585
## 1984 1.581950 1.879669 2.073205 2.635765 2.944433
## 1988 1.561698 1.863501 2.049451 2.607058 2.918135
## 1992 1.569842 1.874192 2.059980 2.623218 2.930501
## 1994 1.560265 1.859918 2.046456 2.596553 2.908780
## 1998 1.551328 1.849051 2.032901 2.582291 2.900547
## 2002 1.536811 1.827240 2.016824 2.573637 2.891493
## 2006 1.541829 1.838156 2.025183 2.573661 2.892968
## 1: 0.135343
## 2: 0.1275123
## Final: 0.1272076
##
## Median Polish Results (Dataset: "log.skating")
##
## Overall: 2.049451
##
## Row Effects:
## 1976 1980 1984 1988 1992 1994
## 0.029706154 0.014653710 0.023754338 0.000000000 0.010529099 -0.005207217
## 1998 2002 2006
## -0.016549939 -0.032626124 -0.025345116
##
## Column Effects:
## X500m X1000m X1500m X5000m X10000m
## -0.4839780 -0.1859493 0.0000000 0.5576075 0.8705210
##
## Residuals:
## X500m X1000m X1500m X5000m X10000m
## 1976 -0.0022252 0.00617525 -0.0022252 0.01108799 0.00000000
## 1980 0.0000000 -0.00205269 -0.0017480 0.00389901 0.00395939
## 1984 -0.0072773 -0.00758643 0.0000000 0.00495273 0.00070732
## 1988 -0.0037751 0.00000000 0.0000000 0.00000000 -0.00183643
## 1992 -0.0061598 0.00016141 0.0000000 0.00563109 0.00000000
## 1994 0.0000000 0.00162440 0.0022127 -0.00529775 -0.00598463
## 1998 0.0024053 0.00209933 0.0000000 -0.00821746 -0.00287436
## 2002 0.0039644 -0.00363518 0.0000000 -0.00079463 0.00414732
## 2006 0.0017013 0.00000000 0.0010774 -0.00805245 -0.00165866
## [1] 112.06
## 1976 1980 1984 1988 1992 1994 1998 2002
## 1.0707946 1.0343171 1.0562199 1.0000000 1.0245404 0.9880815 0.9626093 0.9276281
## 2006
## 0.9433110
## X500m X1000m X1500m X5000m X10000m
## 0.3281119 0.6517044 1.0000000 3.6108335 7.4220016
## X500m X1000m X1500m X5000m X10000m
## 1976 0.9948894 1.0143206 0.9948894 1.0258598 1.0000000
## 1980 1.0000000 0.9952847 0.9959831 1.0090182 1.0091585
## 1984 0.9833830 0.9826833 1.0000000 1.0114693 1.0016300
## 1988 0.9913452 1.0000000 1.0000000 1.0000000 0.9957804
## 1992 0.9859166 1.0003717 1.0000000 1.0130505 1.0000000
## 1994 1.0000000 1.0037473 1.0051080 0.9878756 0.9863144
## 1998 1.0055538 1.0048456 1.0000000 0.9812565 0.9934034
## 2002 1.0091701 0.9916646 1.0000000 0.9981720 1.0095953
## 2006 1.0039250 1.0000000 1.0024840 0.9816294 0.9961881
From the Multiplicative Fit output(note all the values are the log time):
• 2.049451 is the common value
• 0.029706154 is the additive effect due to the year 1976
• −0.4839780 is the additive effect due to the 500m speed skating.
• -0.0022252 is the residual (what’s left of the data after taking out the additive fit).
From the graphs, we see
• There was a clear decrease trend in log time from 1976 to 2002, however, there are some temporary increase happened in 1984,1992 and 2005.
• As we know, the log times increase linearly as a function of log distance. But this graph doesn’t show the fatigue effect – one could discover this by means of a residual plot from a linear fit to this graph.
church.2way <- read.delim("~/data/church.2way.txt")
church <- church.2way[, -1]
row.names(church) <- church.2way[, 1]
church
## y1993 y1994 y1995 y1996
## Jan 336 335 394 381
## Feb 295 369 358 385
## Mar 308 395 364 418
## Apr 321 420 388 413
## May 323 399 379 393
## June 305 355 343 375
## July 265 310 309 338
## Aug 301 301 344 335
## Sept 288 338 399 429
## Oct 345 349 402 399
## Nov 405 376 398 424
## Dec 378 386 401 381
additive.fit <- medpolish(church)
## 1: 786
## 2: 739.25
## Final: 735
additive.fit
##
## Median Polish Results (Dataset: "church")
##
## Overall: 370.5156
##
## Row Effects:
## Jan Feb Mar Apr May June July
## -2.203125 -12.484375 7.015625 16.515625 2.015625 -20.703125 -59.734375
## Aug Sept Oct Nov Dec
## -44.484375 -2.015625 11.296875 27.015625 22.984375
##
## Column Effects:
## y1993 y1994 y1995 y1996
## -45.296875 -4.140625 3.718750 25.578125
##
## Residuals:
## y1993 y1994 y1995 y1996
## Jan 12.98438 -29.1719 21.9688 -12.89062
## Feb -17.73438 15.1094 -3.7500 1.39062
## Mar -24.23438 21.6094 -17.2500 14.89062
## Apr -20.73438 37.1094 -2.7500 0.39062
## May -4.23438 30.6094 2.7500 -5.10938
## June 0.48438 9.3281 -10.5312 -0.39062
## July -0.48438 3.3594 -5.5000 1.64062
## Aug 20.26562 -20.8906 14.2500 -16.60938
## Sept -35.20312 -26.3594 26.7812 34.92188
## Oct 8.48438 -28.6719 16.4688 -8.39062
## Nov 52.76562 -17.3906 -3.2500 0.89062
## Dec 29.79688 -3.3594 3.7812 -38.07812
church <- church[order(additive.fit$row), ]
additive.fit <- medpolish(church)
## 1: 786
## 2: 739.25
## Final: 735
additive.fit
##
## Median Polish Results (Dataset: "church")
##
## Overall: 370.5156
##
## Row Effects:
## July Aug June Feb Jan Sept May
## -59.734375 -44.484375 -20.703125 -12.484375 -2.203125 -2.015625 2.015625
## Mar Oct Apr Dec Nov
## 7.015625 11.296875 16.515625 22.984375 27.015625
##
## Column Effects:
## y1993 y1994 y1995 y1996
## -45.296875 -4.140625 3.718750 25.578125
##
## Residuals:
## y1993 y1994 y1995 y1996
## July -0.48438 3.3594 -5.5000 1.64062
## Aug 20.26562 -20.8906 14.2500 -16.60938
## June 0.48438 9.3281 -10.5312 -0.39062
## Feb -17.73438 15.1094 -3.7500 1.39062
## Jan 12.98438 -29.1719 21.9688 -12.89062
## Sept -35.20312 -26.3594 26.7812 34.92188
## May -4.23438 30.6094 2.7500 -5.10938
## Mar -24.23438 21.6094 -17.2500 14.89062
## Oct 8.48438 -28.6719 16.4688 -8.39062
## Apr -20.73438 37.1094 -2.7500 0.39062
## Dec 29.79688 -3.3594 3.7812 -38.07812
## Nov 52.76562 -17.3906 -3.2500 0.89062
cv <- with(additive.fit,outer(row, col, "*") / overall)
plot(as.vector(cv), as.vector(additive.fit$residuals),
xlab = "COMPARISON VALUES", ylab = "RESIDUALS")
plot(as.vector(cv),as.vector(additive.fit$residuals) -1.46 * as.vector(cv), xlab = "COMPARISON VALUES", ylab = "RESIDUAL FROM FIT", main="Checking Suitability of Line Fit")
abline(h=0, col="red")
First we order the data with the lowest attendance of worship in month (by row effects). From the Fit output, we see some large residuals values which is greater than 25. These large residuals are in the same order of magnitude as the effects, so this additive model doesn’t appear to fit very well. In addition, there is some decreasing trend in residual plot with comparison values. Then we find the coefficient K of comparison value 1.46 and check the if we remove the trend in residual plot. The model would be: FIT = COMMON +ROW EFF +COLEFF +1.46CV. Although the last residual plot still shows some pattern which means the extended fit doesn’t help fix, overall the residual were unaffected now.