library(LearnEDAfunctions)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: ggplot2
transport <- data.frame(
Age.Group = c("18-29", "30-49", "50-64", "65+"),
Car = c(85, 110, 90, 60),
Bus = c(45, 35, 50, 80),
Train = c(30, 55, 75, 40),
Bike = c(60, 25, 10, 5))
transport.table <- as.matrix(transport[, -1])
rownames(transport.table) <- transport$Age.Group
transport.table
## Car Bus Train Bike
## 18-29 85 45 30 60
## 30-49 110 35 55 25
## 50-64 90 50 75 10
## 65+ 60 80 40 5
additive.fit <- medpolish(transport.table)
## 1: 245
## 2: 195
## Final: 195
additive.fit
##
## Median Polish Results (Dataset: "transport.table")
##
## Overall: 50.625
##
## Row Effects:
## 18-29 30-49 50-64 65+
## -0.9375 0.9375 4.0625 -16.5625
##
## Column Effects:
## Car Bus Train Bike
## 35.3125 -4.6875 4.6875 -27.8125
##
## Residuals:
## Car Bus Train Bike
## 18-29 0.000 0.000 -24.375 38.125
## 30-49 23.125 -11.875 -1.250 1.250
## 50-64 0.000 0.000 15.625 -16.875
## 65+ -9.375 50.625 1.250 -1.250
plot(additive.fit)
log.transport <- log10(transport.table)
log.transport
## Car Bus Train Bike
## 18-29 1.929419 1.653213 1.477121 1.778151
## 30-49 2.041393 1.544068 1.740363 1.397940
## 50-64 1.954243 1.698970 1.875061 1.000000
## 65+ 1.778151 1.903090 1.602060 0.698970
additive.fit <- medpolish(log.transport)
## 1: 2.803494
## 2: 2.379956
## Final: 2.379956
additive.fit
##
## Median Polish Results (Dataset: "log.transport")
##
## Overall: 1.7017
##
## Row Effects:
## 18-29 30-49 50-64 65+
## -0.01764527 0.05630762 0.01764527 -0.14446445
##
## Column Effects:
## Car Bus Train Bike
## 0.24013038 -0.02560908 0.01358942 -0.53970678
##
## Residuals:
## Car Bus Train Bike
## 18-29 0.0052335 -0.0052335 -0.220523 0.63380
## 30-49 0.0432544 -0.1883308 -0.031235 0.17964
## 50-64 -0.0052335 0.0052335 0.142126 -0.17964
## 65+ -0.0192150 0.3714632 0.031235 -0.31856
COMMON <- 10 ^ additive.fit$overall
ROW <- 10 ^ additive.fit$row
COL <- 10 ^ additive.fit$col
RESIDUAL <- 10 ^ additive.fit$residual
COMMON; ROW; COL
## [1] 50.31533
## 18-29 30-49 50-64 65+
## 0.9601846 1.1384334 1.0414664 0.7170271
## Car Bus Train Bike
## 1.7383226 0.9427378 1.0317855 0.2885979
A multiplicative fit is more suitable for my data. This model provideds a more accurate representation of how age effects transportation.
data("olympics.speed.skating")
head(olympics.speed.skating)
## YEAR X500m X1000m X1500m X5000m X10000m
## 1 1976 39.17 79.32 119.38 444.48 890.59
## 2 1980 38.03 75.18 115.44 422.29 868.13
## 3 1984 38.19 75.80 118.36 432.28 879.90
## 4 1988 36.45 73.03 112.06 404.63 828.20
## 5 1992 37.14 74.85 114.81 419.97 852.12
## 6 1994 36.33 72.43 111.29 394.96 810.55
times <- olympics.speed.skating[, -1]
rownames(times) <- olympics.speed.skating[, 1]
times
## X500m X1000m X1500m X5000m X10000m
## 1976 39.17 79.32 119.38 444.48 890.59
## 1980 38.03 75.18 115.44 422.29 868.13
## 1984 38.19 75.80 118.36 432.28 879.90
## 1988 36.45 73.03 112.06 404.63 828.20
## 1992 37.14 74.85 114.81 419.97 852.12
## 1994 36.33 72.43 111.29 394.96 810.55
## 1998 35.59 70.64 107.87 382.20 795.33
## 2002 34.42 67.18 103.95 374.66 778.92
## 2006 34.82 68.89 105.97 374.68 781.57
log.times <- log10(times)
log.times
## X500m X1000m X1500m X5000m X10000m
## 1976 1.592954 1.899383 2.076932 2.647852 2.949678
## 1980 1.580126 1.876102 2.062356 2.625611 2.938585
## 1984 1.581950 1.879669 2.073205 2.635765 2.944433
## 1988 1.561698 1.863501 2.049451 2.607058 2.918135
## 1992 1.569842 1.874192 2.059980 2.623218 2.930501
## 1994 1.560265 1.859918 2.046456 2.596553 2.908780
## 1998 1.551328 1.849051 2.032901 2.582291 2.900547
## 2002 1.536811 1.827240 2.016824 2.573637 2.891493
## 2006 1.541829 1.838156 2.025183 2.573661 2.892968
additive.fit <- medpolish(log.times)
## 1: 0.135343
## 2: 0.1275123
## Final: 0.1272076
additive.fit
##
## Median Polish Results (Dataset: "log.times")
##
## Overall: 2.049451
##
## Row Effects:
## 1976 1980 1984 1988 1992 1994
## 0.029706154 0.014653710 0.023754338 0.000000000 0.010529099 -0.005207217
## 1998 2002 2006
## -0.016549939 -0.032626124 -0.025345116
##
## Column Effects:
## X500m X1000m X1500m X5000m X10000m
## -0.4839780 -0.1859493 0.0000000 0.5576075 0.8705210
##
## Residuals:
## X500m X1000m X1500m X5000m X10000m
## 1976 -0.0022252 0.00617525 -0.0022252 0.01108799 0.00000000
## 1980 0.0000000 -0.00205269 -0.0017480 0.00389901 0.00395939
## 1984 -0.0072773 -0.00758643 0.0000000 0.00495273 0.00070732
## 1988 -0.0037751 0.00000000 0.0000000 0.00000000 -0.00183643
## 1992 -0.0061598 0.00016141 0.0000000 0.00563109 0.00000000
## 1994 0.0000000 0.00162440 0.0022127 -0.00529775 -0.00598463
## 1998 0.0024053 0.00209933 0.0000000 -0.00821746 -0.00287436
## 2002 0.0039644 -0.00363518 0.0000000 -0.00079463 0.00414732
## 2006 0.0017013 0.00000000 0.0010774 -0.00805245 -0.00165866
COMMON <- 10 ^ additive.fit$overall
ROW <- 10 ^ additive.fit$row
COL <- 10 ^ additive.fit$col
RESIDUAL <- 10 ^ additive.fit$residual
COMMON; ROW; COL
## [1] 112.06
## 1976 1980 1984 1988 1992 1994 1998 2002
## 1.0707946 1.0343171 1.0562199 1.0000000 1.0245404 0.9880815 0.9626093 0.9276281
## 2006
## 0.9433110
## X500m X1000m X1500m X5000m X10000m
## 0.3281119 0.6517044 1.0000000 3.6108335 7.4220016
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data("church.2way")
head(church.2way)
## month y1993 y1994 y1995 y1996
## 1 Jan 336 335 394 381
## 2 Feb 295 369 358 385
## 3 Mar 308 395 364 418
## 4 Apr 321 420 388 413
## 5 May 323 399 379 393
## 6 June 305 355 343 375
church.2way.num <- church.2way %>%
select(where(is.numeric)) %>%
as.matrix()
(additive.fit <- medpolish(church.2way.num))
## 1: 786
## 2: 739.25
## Final: 735
##
## Median Polish Results (Dataset: "church.2way.num")
##
## Overall: 370.5156
##
## Row Effects:
## [1] -2.203125 -12.484375 7.015625 16.515625 2.015625 -20.703125
## [7] -59.734375 -44.484375 -2.015625 11.296875 27.015625 22.984375
##
## Column Effects:
## y1993 y1994 y1995 y1996
## -45.296875 -4.140625 3.718750 25.578125
##
## Residuals:
## y1993 y1994 y1995 y1996
## [1,] 12.98438 -29.1719 21.9688 -12.89062
## [2,] -17.73438 15.1094 -3.7500 1.39062
## [3,] -24.23438 21.6094 -17.2500 14.89062
## [4,] -20.73438 37.1094 -2.7500 0.39062
## [5,] -4.23438 30.6094 2.7500 -5.10938
## [6,] 0.48438 9.3281 -10.5312 -0.39062
## [7,] -0.48438 3.3594 -5.5000 1.64062
## [8,] 20.26562 -20.8906 14.2500 -16.60938
## [9,] -35.20312 -26.3594 26.7812 34.92188
## [10,] 8.48438 -28.6719 16.4688 -8.39062
## [11,] 52.76562 -17.3906 -3.2500 0.89062
## [12,] 29.79688 -3.3594 3.7812 -38.07812
church.2way.num <- church.2way.num[order(additive.fit$row), ]
additive.fit <- medpolish(church.2way.num)
## 1: 786
## 2: 739.25
## Final: 735
resids <- additive.fit$residuals
cv <- with(additive.fit, outer(row, col, "*") / overall)
df <- data.frame(
Comparison_Value = as.vector(cv),
Residual = as.vector(additive.fit$residuals))
ggplot(df, aes(Comparison_Value, Residual)) +
geom_point() +
geom_hline(yintercept = 0, color = "red") +
labs(title = "Residuals vs Comparison Values (Additive Fit)",
x = "Comparison Value (CV)",
y = "Residual")
The extended fit decomposes the data into a common. Extended fit is
needed because attendance patterns vary by year and month individually.
A few residuals are higher and lower than expected. With that being
said, there is no strong pattern in the residuals.