## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: ggplot2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
purchases <- data.frame(
Age = c("18-29", "30-44", "45-59", "60+"),
Electronics = c(42, 57, 33, 21),
Clothing = c(55, 48, 29, 18),
HomeGoods = c(28, 39, 41, 26),
Sports = c(31, 44, 22, 15)
)
purchases.table <- as.matrix(purchases[, -1])
rownames(purchases.table) <- purchases$Age
purchases.table
## Electronics Clothing HomeGoods Sports
## 18-29 42 55 28 31
## 30-44 57 48 39 44
## 45-59 33 29 41 22
## 60+ 21 18 26 15
additive.fit <- medpolish(purchases.table)
## 1: 77
## 2: 69
## Final: 69
additive.fit
##
## Median Polish Results (Dataset: "purchases.table")
##
## Overall: 32.29688
##
## Row Effects:
## 18-29 30-44 45-59 60+
## 4.015625 15.828125 -4.015625 -13.671875
##
## Column Effects:
## Electronics Clothing HomeGoods Sports
## 5.203125 0.296875 -0.468750 -4.718750
##
## Residuals:
## Electronics Clothing HomeGoods Sports
## 18-29 0.48438 18.39062 -7.8438 -0.59375
## 30-44 3.67188 -0.42188 -8.6562 0.59375
## 45-59 -0.48438 0.42188 13.1875 -1.56250
## 60+ -2.82812 -0.92188 7.8438 1.09375
plot(additive.fit)
By looking at the row effects, we see that the 30-44 age group purchases
the most items, while the 60+ group purchases the fewest items. In terms
of the column effects, we see that electronics are most commonly
purchased, and sporting goods are the least commonly purchased.
log.purchases <- log10(purchases.table)
log.purchases
## Electronics Clothing HomeGoods Sports
## 18-29 1.623249 1.740363 1.447158 1.491362
## 30-44 1.755875 1.681241 1.591065 1.643453
## 45-59 1.518514 1.462398 1.612784 1.342423
## 60+ 1.322219 1.255273 1.414973 1.176091
additive.fit <- medpolish(log.purchases)
## 1: 0.9672582
## 2: 0.8084129
## Final: 0.8084129
additive.fit
##
## Median Polish Results (Dataset: "log.purchases")
##
## Overall: 1.527827
##
## Row Effects:
## 18-29 30-44 45-59 60+
## 0.05335298 0.17474910 -0.05335298 -0.25479723
##
## Column Effects:
## Electronics Clothing HomeGoods Sports
## 0.04661499 -0.01491638 0.01339944 -0.09337813
##
## Residuals:
## Electronics Clothing HomeGoods Sports
## 18-29 -0.0045454 0.1740993 -0.14742 0.0035601
## 30-44 0.0066840 -0.0064182 -0.12491 0.0342550
## 45-59 -0.0025748 0.0028406 0.12491 -0.0386729
## 60+ 0.0025748 -0.0028406 0.12854 -0.0035601
COMMON <- 10 ^ additive.fit$overall
ROW <- 10 ^ additive.fit$row
COL <- 10 ^ additive.fit$col
RESIDUAL <- 10 ^ additive.fit$residual
COMMON; ROW; COL
## [1] 33.71528
## 18-29 30-44 45-59 60+
## 1.1307146 1.4953715 0.8843965 0.5561639
## Electronics Clothing HomeGoods Sports
## 1.1133071 0.9662369 1.0313342 0.8065325
In terms of this data set, the additive fit gives a clearer look at the effects of each column and row.
head(olympics.speed.skating)
## YEAR X500m X1000m X1500m X5000m X10000m
## 1 1976 39.17 79.32 119.38 444.48 890.59
## 2 1980 38.03 75.18 115.44 422.29 868.13
## 3 1984 38.19 75.80 118.36 432.28 879.90
## 4 1988 36.45 73.03 112.06 404.63 828.20
## 5 1992 37.14 74.85 114.81 419.97 852.12
## 6 1994 36.33 72.43 111.29 394.96 810.55
skating <- olympics.speed.skating[, -1]
rownames(skating) <- olympics.speed.skating[, 1]
skating
## X500m X1000m X1500m X5000m X10000m
## 1976 39.17 79.32 119.38 444.48 890.59
## 1980 38.03 75.18 115.44 422.29 868.13
## 1984 38.19 75.80 118.36 432.28 879.90
## 1988 36.45 73.03 112.06 404.63 828.20
## 1992 37.14 74.85 114.81 419.97 852.12
## 1994 36.33 72.43 111.29 394.96 810.55
## 1998 35.59 70.64 107.87 382.20 795.33
## 2002 34.42 67.18 103.95 374.66 778.92
## 2006 34.82 68.89 105.97 374.68 781.57
log.skating <- log10(skating)
log.skating
## X500m X1000m X1500m X5000m X10000m
## 1976 1.592954 1.899383 2.076932 2.647852 2.949678
## 1980 1.580126 1.876102 2.062356 2.625611 2.938585
## 1984 1.581950 1.879669 2.073205 2.635765 2.944433
## 1988 1.561698 1.863501 2.049451 2.607058 2.918135
## 1992 1.569842 1.874192 2.059980 2.623218 2.930501
## 1994 1.560265 1.859918 2.046456 2.596553 2.908780
## 1998 1.551328 1.849051 2.032901 2.582291 2.900547
## 2002 1.536811 1.827240 2.016824 2.573637 2.891493
## 2006 1.541829 1.838156 2.025183 2.573661 2.892968
additive.fit <- medpolish(log.skating)
## 1: 0.135343
## 2: 0.1275123
## Final: 0.1272076
additive.fit
##
## Median Polish Results (Dataset: "log.skating")
##
## Overall: 2.049451
##
## Row Effects:
## 1976 1980 1984 1988 1992 1994
## 0.029706154 0.014653710 0.023754338 0.000000000 0.010529099 -0.005207217
## 1998 2002 2006
## -0.016549939 -0.032626124 -0.025345116
##
## Column Effects:
## X500m X1000m X1500m X5000m X10000m
## -0.4839780 -0.1859493 0.0000000 0.5576075 0.8705210
##
## Residuals:
## X500m X1000m X1500m X5000m X10000m
## 1976 -0.0022252 0.00617525 -0.0022252 0.01108799 0.00000000
## 1980 0.0000000 -0.00205269 -0.0017480 0.00389901 0.00395939
## 1984 -0.0072773 -0.00758643 0.0000000 0.00495273 0.00070732
## 1988 -0.0037751 0.00000000 0.0000000 0.00000000 -0.00183643
## 1992 -0.0061598 0.00016141 0.0000000 0.00563109 0.00000000
## 1994 0.0000000 0.00162440 0.0022127 -0.00529775 -0.00598463
## 1998 0.0024053 0.00209933 0.0000000 -0.00821746 -0.00287436
## 2002 0.0039644 -0.00363518 0.0000000 -0.00079463 0.00414732
## 2006 0.0017013 0.00000000 0.0010774 -0.00805245 -0.00165866
COMMON <- 10 ^ additive.fit$overall
ROW <- 10 ^ additive.fit$row
COL <- 10 ^ additive.fit$col
RESIDUAL <- additive.fit$residual
COMMON; ROW; COL
## [1] 112.06
## 1976 1980 1984 1988 1992 1994 1998 2002
## 1.0707946 1.0343171 1.0562199 1.0000000 1.0245404 0.9880815 0.9626093 0.9276281
## 2006
## 0.9433110
## X500m X1000m X1500m X5000m X10000m
## 0.3281119 0.6517044 1.0000000 3.6108335 7.4220016
For the following data, analyze using an extended fit. Interpret the fit and the residuals.
head(church.2way)
## month y1993 y1994 y1995 y1996
## 1 Jan 336 335 394 381
## 2 Feb 295 369 358 385
## 3 Mar 308 395 364 418
## 4 Apr 321 420 388 413
## 5 May 323 399 379 393
## 6 June 305 355 343 375
attendance <- church.2way[, -1]
row.names(attendance) <- church.2way[, 1]
attendance
## y1993 y1994 y1995 y1996
## Jan 336 335 394 381
## Feb 295 369 358 385
## Mar 308 395 364 418
## Apr 321 420 388 413
## May 323 399 379 393
## June 305 355 343 375
## July 265 310 309 338
## Aug 301 301 344 335
## Sept 288 338 399 429
## Oct 345 349 402 399
## Nov 405 376 398 424
## Dec 378 386 401 381
additive.fit <- medpolish(attendance)
## 1: 786
## 2: 739.25
## Final: 735
attendance <- attendance[order(additive.fit$row),]
additive.fit <- medpolish(attendance)
## 1: 786
## 2: 739.25
## Final: 735
additive.fit$residuals
## y1993 y1994 y1995 y1996
## July -0.484375 3.359375 -5.50000 1.640625
## Aug 20.265625 -20.890625 14.25000 -16.609375
## June 0.484375 9.328125 -10.53125 -0.390625
## Feb -17.734375 15.109375 -3.75000 1.390625
## Jan 12.984375 -29.171875 21.96875 -12.890625
## Sept -35.203125 -26.359375 26.78125 34.921875
## May -4.234375 30.609375 2.75000 -5.109375
## Mar -24.234375 21.609375 -17.25000 14.890625
## Oct 8.484375 -28.671875 16.46875 -8.390625
## Apr -20.734375 37.109375 -2.75000 0.390625
## Dec 29.796875 -3.359375 3.78125 -38.078125
## Nov 52.765625 -17.390625 -3.25000 0.890625
cv <- with(additive.fit,
outer(row, col, "*")/overall)
df <- data.frame(Comparison_Value = as.vector(cv),
Residual = as.vector(additive.fit$residuals))
ggplot(df, aes(Comparison_Value, Residual)) + geom_point()