## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: ggplot2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     ✔ tidyr     1.3.1
## ✔ readr     2.1.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Question 1

purchases <- data.frame(
  Age = c("18-29", "30-44", "45-59", "60+"),
  Electronics = c(42, 57, 33, 21),
  Clothing = c(55, 48, 29, 18),
  HomeGoods = c(28, 39, 41, 26),
  Sports = c(31, 44, 22, 15)
)

purchases.table <- as.matrix(purchases[, -1])
rownames(purchases.table) <- purchases$Age

purchases.table
##       Electronics Clothing HomeGoods Sports
## 18-29          42       55        28     31
## 30-44          57       48        39     44
## 45-59          33       29        41     22
## 60+            21       18        26     15
additive.fit <- medpolish(purchases.table)
## 1: 77
## 2: 69
## Final: 69
additive.fit
## 
## Median Polish Results (Dataset: "purchases.table")
## 
## Overall: 32.29688
## 
## Row Effects:
##      18-29      30-44      45-59        60+ 
##   4.015625  15.828125  -4.015625 -13.671875 
## 
## Column Effects:
## Electronics    Clothing   HomeGoods      Sports 
##    5.203125    0.296875   -0.468750   -4.718750 
## 
## Residuals:
##       Electronics Clothing HomeGoods   Sports
## 18-29     0.48438 18.39062   -7.8438 -0.59375
## 30-44     3.67188 -0.42188   -8.6562  0.59375
## 45-59    -0.48438  0.42188   13.1875 -1.56250
## 60+      -2.82812 -0.92188    7.8438  1.09375
plot(additive.fit)

By looking at the row effects, we see that the 30-44 age group purchases the most items, while the 60+ group purchases the fewest items. In terms of the column effects, we see that electronics are most commonly purchased, and sporting goods are the least commonly purchased.

log.purchases <- log10(purchases.table)
log.purchases
##       Electronics Clothing HomeGoods   Sports
## 18-29    1.623249 1.740363  1.447158 1.491362
## 30-44    1.755875 1.681241  1.591065 1.643453
## 45-59    1.518514 1.462398  1.612784 1.342423
## 60+      1.322219 1.255273  1.414973 1.176091
additive.fit <- medpolish(log.purchases)
## 1: 0.9672582
## 2: 0.8084129
## Final: 0.8084129
additive.fit
## 
## Median Polish Results (Dataset: "log.purchases")
## 
## Overall: 1.527827
## 
## Row Effects:
##       18-29       30-44       45-59         60+ 
##  0.05335298  0.17474910 -0.05335298 -0.25479723 
## 
## Column Effects:
## Electronics    Clothing   HomeGoods      Sports 
##  0.04661499 -0.01491638  0.01339944 -0.09337813 
## 
## Residuals:
##       Electronics   Clothing HomeGoods     Sports
## 18-29  -0.0045454  0.1740993  -0.14742  0.0035601
## 30-44   0.0066840 -0.0064182  -0.12491  0.0342550
## 45-59  -0.0025748  0.0028406   0.12491 -0.0386729
## 60+     0.0025748 -0.0028406   0.12854 -0.0035601
COMMON <- 10 ^ additive.fit$overall
ROW <- 10 ^ additive.fit$row
COL <- 10 ^ additive.fit$col
RESIDUAL <- 10 ^ additive.fit$residual

COMMON; ROW; COL
## [1] 33.71528
##     18-29     30-44     45-59       60+ 
## 1.1307146 1.4953715 0.8843965 0.5561639
## Electronics    Clothing   HomeGoods      Sports 
##   1.1133071   0.9662369   1.0313342   0.8065325

In terms of this data set, the additive fit gives a clearer look at the effects of each column and row.

Question 2: olympics.speed.skating multiplicative fit

head(olympics.speed.skating)
##   YEAR X500m X1000m X1500m X5000m X10000m
## 1 1976 39.17  79.32 119.38 444.48  890.59
## 2 1980 38.03  75.18 115.44 422.29  868.13
## 3 1984 38.19  75.80 118.36 432.28  879.90
## 4 1988 36.45  73.03 112.06 404.63  828.20
## 5 1992 37.14  74.85 114.81 419.97  852.12
## 6 1994 36.33  72.43 111.29 394.96  810.55
skating <- olympics.speed.skating[, -1]
rownames(skating) <- olympics.speed.skating[, 1]
skating
##      X500m X1000m X1500m X5000m X10000m
## 1976 39.17  79.32 119.38 444.48  890.59
## 1980 38.03  75.18 115.44 422.29  868.13
## 1984 38.19  75.80 118.36 432.28  879.90
## 1988 36.45  73.03 112.06 404.63  828.20
## 1992 37.14  74.85 114.81 419.97  852.12
## 1994 36.33  72.43 111.29 394.96  810.55
## 1998 35.59  70.64 107.87 382.20  795.33
## 2002 34.42  67.18 103.95 374.66  778.92
## 2006 34.82  68.89 105.97 374.68  781.57
log.skating <- log10(skating)
log.skating
##         X500m   X1000m   X1500m   X5000m  X10000m
## 1976 1.592954 1.899383 2.076932 2.647852 2.949678
## 1980 1.580126 1.876102 2.062356 2.625611 2.938585
## 1984 1.581950 1.879669 2.073205 2.635765 2.944433
## 1988 1.561698 1.863501 2.049451 2.607058 2.918135
## 1992 1.569842 1.874192 2.059980 2.623218 2.930501
## 1994 1.560265 1.859918 2.046456 2.596553 2.908780
## 1998 1.551328 1.849051 2.032901 2.582291 2.900547
## 2002 1.536811 1.827240 2.016824 2.573637 2.891493
## 2006 1.541829 1.838156 2.025183 2.573661 2.892968
additive.fit <- medpolish(log.skating)
## 1: 0.135343
## 2: 0.1275123
## Final: 0.1272076
additive.fit
## 
## Median Polish Results (Dataset: "log.skating")
## 
## Overall: 2.049451
## 
## Row Effects:
##         1976         1980         1984         1988         1992         1994 
##  0.029706154  0.014653710  0.023754338  0.000000000  0.010529099 -0.005207217 
##         1998         2002         2006 
## -0.016549939 -0.032626124 -0.025345116 
## 
## Column Effects:
##      X500m     X1000m     X1500m     X5000m    X10000m 
## -0.4839780 -0.1859493  0.0000000  0.5576075  0.8705210 
## 
## Residuals:
##           X500m      X1000m     X1500m      X5000m     X10000m
## 1976 -0.0022252  0.00617525 -0.0022252  0.01108799  0.00000000
## 1980  0.0000000 -0.00205269 -0.0017480  0.00389901  0.00395939
## 1984 -0.0072773 -0.00758643  0.0000000  0.00495273  0.00070732
## 1988 -0.0037751  0.00000000  0.0000000  0.00000000 -0.00183643
## 1992 -0.0061598  0.00016141  0.0000000  0.00563109  0.00000000
## 1994  0.0000000  0.00162440  0.0022127 -0.00529775 -0.00598463
## 1998  0.0024053  0.00209933  0.0000000 -0.00821746 -0.00287436
## 2002  0.0039644 -0.00363518  0.0000000 -0.00079463  0.00414732
## 2006  0.0017013  0.00000000  0.0010774 -0.00805245 -0.00165866
COMMON <- 10 ^ additive.fit$overall
ROW <- 10 ^ additive.fit$row
COL <- 10 ^ additive.fit$col
RESIDUAL <- additive.fit$residual

COMMON; ROW; COL
## [1] 112.06
##      1976      1980      1984      1988      1992      1994      1998      2002 
## 1.0707946 1.0343171 1.0562199 1.0000000 1.0245404 0.9880815 0.9626093 0.9276281 
##      2006 
## 0.9433110
##     X500m    X1000m    X1500m    X5000m   X10000m 
## 0.3281119 0.6517044 1.0000000 3.6108335 7.4220016

Question 3: church.2way

For the following data, analyze using an extended fit. Interpret the fit and the residuals.

head(church.2way)
##   month y1993 y1994 y1995 y1996
## 1   Jan   336   335   394   381
## 2   Feb   295   369   358   385
## 3   Mar   308   395   364   418
## 4   Apr   321   420   388   413
## 5   May   323   399   379   393
## 6  June   305   355   343   375
attendance <- church.2way[, -1]
row.names(attendance) <- church.2way[, 1]
attendance
##      y1993 y1994 y1995 y1996
## Jan    336   335   394   381
## Feb    295   369   358   385
## Mar    308   395   364   418
## Apr    321   420   388   413
## May    323   399   379   393
## June   305   355   343   375
## July   265   310   309   338
## Aug    301   301   344   335
## Sept   288   338   399   429
## Oct    345   349   402   399
## Nov    405   376   398   424
## Dec    378   386   401   381
additive.fit <- medpolish(attendance)
## 1: 786
## 2: 739.25
## Final: 735
attendance <- attendance[order(additive.fit$row),]
additive.fit <- medpolish(attendance)
## 1: 786
## 2: 739.25
## Final: 735
additive.fit$residuals
##           y1993      y1994     y1995      y1996
## July  -0.484375   3.359375  -5.50000   1.640625
## Aug   20.265625 -20.890625  14.25000 -16.609375
## June   0.484375   9.328125 -10.53125  -0.390625
## Feb  -17.734375  15.109375  -3.75000   1.390625
## Jan   12.984375 -29.171875  21.96875 -12.890625
## Sept -35.203125 -26.359375  26.78125  34.921875
## May   -4.234375  30.609375   2.75000  -5.109375
## Mar  -24.234375  21.609375 -17.25000  14.890625
## Oct    8.484375 -28.671875  16.46875  -8.390625
## Apr  -20.734375  37.109375  -2.75000   0.390625
## Dec   29.796875  -3.359375   3.78125 -38.078125
## Nov   52.765625 -17.390625  -3.25000   0.890625
cv <- with(additive.fit,
           outer(row, col, "*")/overall)
df <- data.frame(Comparison_Value = as.vector(cv),
                 Residual = as.vector(additive.fit$residuals))
ggplot(df, aes(Comparison_Value, Residual)) + geom_point()