library(LearnEDAfunctions)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: ggplot2

Question 1

transport <- data.frame(
  Age.Group = c("18-29", "30-49", "50-64", "65+"),
  Car = c(85, 110, 90, 60),
  Bus = c(45, 35, 50, 80),
  Train = c(30, 55, 75, 40),
  Bike = c(60, 25, 10, 5))

transport.table <- as.matrix(transport[, -1])
rownames(transport.table) <- transport$Age.Group

transport.table
##       Car Bus Train Bike
## 18-29  85  45    30   60
## 30-49 110  35    55   25
## 50-64  90  50    75   10
## 65+    60  80    40    5
additive.fit <- medpolish(transport.table)
## 1: 245
## 2: 195
## Final: 195
additive.fit
## 
## Median Polish Results (Dataset: "transport.table")
## 
## Overall: 50.625
## 
## Row Effects:
##    18-29    30-49    50-64      65+ 
##  -0.9375   0.9375   4.0625 -16.5625 
## 
## Column Effects:
##      Car      Bus    Train     Bike 
##  35.3125  -4.6875   4.6875 -27.8125 
## 
## Residuals:
##          Car     Bus   Train    Bike
## 18-29  0.000   0.000 -24.375  38.125
## 30-49 23.125 -11.875  -1.250   1.250
## 50-64  0.000   0.000  15.625 -16.875
## 65+   -9.375  50.625   1.250  -1.250
plot(additive.fit)

log.transport <- log10(transport.table)
log.transport
##            Car      Bus    Train     Bike
## 18-29 1.929419 1.653213 1.477121 1.778151
## 30-49 2.041393 1.544068 1.740363 1.397940
## 50-64 1.954243 1.698970 1.875061 1.000000
## 65+   1.778151 1.903090 1.602060 0.698970
additive.fit <- medpolish(log.transport)
## 1: 2.803494
## 2: 2.379956
## Final: 2.379956
additive.fit
## 
## Median Polish Results (Dataset: "log.transport")
## 
## Overall: 1.7017
## 
## Row Effects:
##       18-29       30-49       50-64         65+ 
## -0.01764527  0.05630762  0.01764527 -0.14446445 
## 
## Column Effects:
##         Car         Bus       Train        Bike 
##  0.24013038 -0.02560908  0.01358942 -0.53970678 
## 
## Residuals:
##              Car        Bus     Train     Bike
## 18-29  0.0052335 -0.0052335 -0.220523  0.63380
## 30-49  0.0432544 -0.1883308 -0.031235  0.17964
## 50-64 -0.0052335  0.0052335  0.142126 -0.17964
## 65+   -0.0192150  0.3714632  0.031235 -0.31856
COMMON <- 10 ^ additive.fit$overall
ROW <- 10 ^ additive.fit$row
COL <- 10 ^ additive.fit$col
RESIDUAL <- 10 ^ additive.fit$residual

COMMON; ROW; COL
## [1] 50.31533
##     18-29     30-49     50-64       65+ 
## 0.9601846 1.1384334 1.0414664 0.7170271
##       Car       Bus     Train      Bike 
## 1.7383226 0.9427378 1.0317855 0.2885979

A multiplicative fit is more suitable for my data. This model provideds a more accurate representation of how age effects transportation.

Question 2

data("olympics.speed.skating")
head(olympics.speed.skating)
##   YEAR X500m X1000m X1500m X5000m X10000m
## 1 1976 39.17  79.32 119.38 444.48  890.59
## 2 1980 38.03  75.18 115.44 422.29  868.13
## 3 1984 38.19  75.80 118.36 432.28  879.90
## 4 1988 36.45  73.03 112.06 404.63  828.20
## 5 1992 37.14  74.85 114.81 419.97  852.12
## 6 1994 36.33  72.43 111.29 394.96  810.55
times <- olympics.speed.skating[, -1]   
rownames(times) <- olympics.speed.skating[, 1]
times
##      X500m X1000m X1500m X5000m X10000m
## 1976 39.17  79.32 119.38 444.48  890.59
## 1980 38.03  75.18 115.44 422.29  868.13
## 1984 38.19  75.80 118.36 432.28  879.90
## 1988 36.45  73.03 112.06 404.63  828.20
## 1992 37.14  74.85 114.81 419.97  852.12
## 1994 36.33  72.43 111.29 394.96  810.55
## 1998 35.59  70.64 107.87 382.20  795.33
## 2002 34.42  67.18 103.95 374.66  778.92
## 2006 34.82  68.89 105.97 374.68  781.57
log.times <- log10(times)
log.times
##         X500m   X1000m   X1500m   X5000m  X10000m
## 1976 1.592954 1.899383 2.076932 2.647852 2.949678
## 1980 1.580126 1.876102 2.062356 2.625611 2.938585
## 1984 1.581950 1.879669 2.073205 2.635765 2.944433
## 1988 1.561698 1.863501 2.049451 2.607058 2.918135
## 1992 1.569842 1.874192 2.059980 2.623218 2.930501
## 1994 1.560265 1.859918 2.046456 2.596553 2.908780
## 1998 1.551328 1.849051 2.032901 2.582291 2.900547
## 2002 1.536811 1.827240 2.016824 2.573637 2.891493
## 2006 1.541829 1.838156 2.025183 2.573661 2.892968
additive.fit <- medpolish(log.times)
## 1: 0.135343
## 2: 0.1275123
## Final: 0.1272076
additive.fit
## 
## Median Polish Results (Dataset: "log.times")
## 
## Overall: 2.049451
## 
## Row Effects:
##         1976         1980         1984         1988         1992         1994 
##  0.029706154  0.014653710  0.023754338  0.000000000  0.010529099 -0.005207217 
##         1998         2002         2006 
## -0.016549939 -0.032626124 -0.025345116 
## 
## Column Effects:
##      X500m     X1000m     X1500m     X5000m    X10000m 
## -0.4839780 -0.1859493  0.0000000  0.5576075  0.8705210 
## 
## Residuals:
##           X500m      X1000m     X1500m      X5000m     X10000m
## 1976 -0.0022252  0.00617525 -0.0022252  0.01108799  0.00000000
## 1980  0.0000000 -0.00205269 -0.0017480  0.00389901  0.00395939
## 1984 -0.0072773 -0.00758643  0.0000000  0.00495273  0.00070732
## 1988 -0.0037751  0.00000000  0.0000000  0.00000000 -0.00183643
## 1992 -0.0061598  0.00016141  0.0000000  0.00563109  0.00000000
## 1994  0.0000000  0.00162440  0.0022127 -0.00529775 -0.00598463
## 1998  0.0024053  0.00209933  0.0000000 -0.00821746 -0.00287436
## 2002  0.0039644 -0.00363518  0.0000000 -0.00079463  0.00414732
## 2006  0.0017013  0.00000000  0.0010774 -0.00805245 -0.00165866
COMMON <- 10 ^ additive.fit$overall
ROW <- 10 ^ additive.fit$row
COL <- 10 ^ additive.fit$col
RESIDUAL <- 10 ^ additive.fit$residual

COMMON; ROW; COL
## [1] 112.06
##      1976      1980      1984      1988      1992      1994      1998      2002 
## 1.0707946 1.0343171 1.0562199 1.0000000 1.0245404 0.9880815 0.9626093 0.9276281 
##      2006 
## 0.9433110
##     X500m    X1000m    X1500m    X5000m   X10000m 
## 0.3281119 0.6517044 1.0000000 3.6108335 7.4220016

Question 3

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     ✔ tidyr     1.3.1
## ✔ readr     2.1.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data("church.2way")
head(church.2way)
##   month y1993 y1994 y1995 y1996
## 1   Jan   336   335   394   381
## 2   Feb   295   369   358   385
## 3   Mar   308   395   364   418
## 4   Apr   321   420   388   413
## 5   May   323   399   379   393
## 6  June   305   355   343   375
church.2way.num <- church.2way %>%
  select(where(is.numeric)) %>%
  as.matrix()
(additive.fit <- medpolish(church.2way.num))
## 1: 786
## 2: 739.25
## Final: 735
## 
## Median Polish Results (Dataset: "church.2way.num")
## 
## Overall: 370.5156
## 
## Row Effects:
##  [1]  -2.203125 -12.484375   7.015625  16.515625   2.015625 -20.703125
##  [7] -59.734375 -44.484375  -2.015625  11.296875  27.015625  22.984375
## 
## Column Effects:
##      y1993      y1994      y1995      y1996 
## -45.296875  -4.140625   3.718750  25.578125 
## 
## Residuals:
##           y1993    y1994    y1995     y1996
##  [1,]  12.98438 -29.1719  21.9688 -12.89062
##  [2,] -17.73438  15.1094  -3.7500   1.39062
##  [3,] -24.23438  21.6094 -17.2500  14.89062
##  [4,] -20.73438  37.1094  -2.7500   0.39062
##  [5,]  -4.23438  30.6094   2.7500  -5.10938
##  [6,]   0.48438   9.3281 -10.5312  -0.39062
##  [7,]  -0.48438   3.3594  -5.5000   1.64062
##  [8,]  20.26562 -20.8906  14.2500 -16.60938
##  [9,] -35.20312 -26.3594  26.7812  34.92188
## [10,]   8.48438 -28.6719  16.4688  -8.39062
## [11,]  52.76562 -17.3906  -3.2500   0.89062
## [12,]  29.79688  -3.3594   3.7812 -38.07812
church.2way.num <- church.2way.num[order(additive.fit$row), ]
additive.fit <- medpolish(church.2way.num)
## 1: 786
## 2: 739.25
## Final: 735
resids <- additive.fit$residuals
cv <- with(additive.fit, outer(row, col, "*") / overall)

df <- data.frame(
  Comparison_Value = as.vector(cv),
  Residual = as.vector(additive.fit$residuals))
ggplot(df, aes(Comparison_Value, Residual)) +
  geom_point() +
  geom_hline(yintercept = 0, color = "red") +
  labs(title = "Residuals vs Comparison Values (Additive Fit)",
       x = "Comparison Value (CV)",
       y = "Residual")

The extended fit decomposes the data into a common. Extended fit is needed because attendance patterns vary by year and month individually. A few residuals are higher and lower than expected. With that being said, there is no strong pattern in the residuals.