## year month GS IP
## Min. :1974 Min. : 3.0 Min. : 2 Min. : 17
## 1st Qu.:1984 1st Qu.: 5.0 1st Qu.:663 1st Qu.:5928
## Median :1995 Median : 7.0 Median :732 Median :6531
## Mean :1994 Mean : 6.7 Mean :640 Mean :5719
## 3rd Qu.:2004 3rd Qu.: 8.0 3rd Qu.:795 3rd Qu.:7096
## Max. :2014 Max. :10.0 Max. :872 Max. :7763
##
## TBF H HR R
## Min. : 70 Min. : 14 Min. : 1 Min. : 5
## 1st Qu.:25388 1st Qu.:5770 1st Qu.: 464 1st Qu.:2720
## Median :27906 Median :6528 Median : 634 Median :3216
## Mean :24550 Mean :5714 Mean : 590 Mean :2875
## 3rd Qu.:30581 3rd Qu.:7140 3rd Qu.: 811 3rd Qu.:3667
## Max. :33963 Max. :8124 Max. :1069 Max. :4330
##
## ER SO BB RA
## Min. : 4 Min. : 14 Min. : 3 Min. :1.93
## 1st Qu.:2394 1st Qu.:3351 1st Qu.:2139 1st Qu.:4.20
## Median :2909 Median :4144 Median :2384 Median :4.44
## Mean :2612 Mean :3891 Mean :2105 Mean :4.47
## 3rd Qu.:3355 3rd Qu.:5198 3rd Qu.:2600 3rd Qu.:4.79
## Max. :3989 Max. :6467 Max. :3137 Max. :6.17
##
## ERA HR.battedball GB. FB.
## Min. :1.54 Min. :0.0164 Min. :0.364 Min. :0.312
## 1st Qu.:3.79 1st Qu.:0.0324 1st Qu.:0.441 1st Qu.:0.344
## Median :4.04 Median :0.0368 Median :0.452 Median :0.368
## Mean :4.05 Mean :0.0367 Mean :0.466 Mean :0.382
## 3rd Qu.:4.37 3rd Qu.:0.0397 3rd Qu.:0.500 3rd Qu.:0.424
## Max. :6.17 Max. :0.0701 Max. :0.557 Max. :0.500
##
## LD. HR.FB K. BB.
## Min. :0.0098 Min. :0.0417 Min. :0.113 Min. :0.0208
## 1st Qu.:0.0704 1st Qu.:0.0814 1st Qu.:0.139 1st Qu.:0.0823
## Median :0.1922 Median :0.0967 Median :0.160 Median :0.0855
## Mean :0.1525 Mean :0.0969 Mean :0.159 Mean :0.0861
## 3rd Qu.:0.2105 3rd Qu.:0.1100 3rd Qu.:0.173 3rd Qu.:0.0890
## Max. :0.2955 Max. :0.1842 Max. :0.299 Max. :0.1856
##
## BABIP XBHrate X3b.XBH IFFB.
## Min. :0.189 Min. :0.133 Min. :0.0000 Min. :0.000
## 1st Qu.:0.298 1st Qu.:0.217 1st Qu.:0.0938 1st Qu.:0.148
## Median :0.306 Median :0.232 Median :0.1079 Median :0.281
## Mean :0.338 Mean :0.232 Mean :0.1154 Mean :0.243
## 3rd Qu.:0.392 3rd Qu.:0.248 3rd Qu.:0.1322 3rd Qu.:0.304
## Max. :0.497 Max. :0.440 Max. :1.0000 Max. :0.467
##
## SBArate SB.
## Min. :0.0000 0.6609 : 4
## 1st Qu.:0.0879 0.6667 : 3
## Median :0.1035 0.7141 : 3
## Mean :0.1025 0.0000 : 2
## 3rd Qu.:0.1166 0.6389 : 2
## Max. :0.3500 0.6663 : 2
## (Other):267
## [1] "year" "month" "GS" "IP"
## [5] "TBF" "ERA" "RA" "HR.battedball"
## [9] "GB." "FB." "LD." "HR.FB"
## [13] "K." "BB." "BABIP" "XBHrate"
## [17] "X3b.XBH" "IFFB." "SBArate" "SB."
## month PA R.game GB.BIP FB.BIP LD.BIP K BB HR.flyball babip
## 1 4 931502 0.5 1.2 -0.5 -2.8 -1.1 5.9 -1.7 -1.7
## 2 5 1167848 0.1 0.7 -0.1 -1.1 -0.8 1.8 0.2 -0.2
## 3 6 1147107 0.5 0.0 -0.1 -0.2 -0.9 -1.1 2.1 0.6
## 4 7 1121828 1.0 -0.3 -0.1 1.5 -1.0 -2.6 2.0 0.5
## 5 8 1215853 -0.1 -0.7 0.3 1.5 -0.2 -2.6 0.8 0.5
## 6 9 1149558 -1.4 -0.5 0.2 0.5 3.1 0.1 -2.9 0.0
## dt.sdt t.dt SBA.TOF SB.SBA
## 1 2.5 -0.9 3.2 -0.9
## 2 0.5 -0.8 3.4 -0.9
## 3 -0.2 -0.5 0.1 -0.3
## 4 -0.2 -0.8 -2.2 -0.3
## 5 -1.0 -0.2 -3.7 0.1
## 6 -1.1 2.3 -0.4 2.1
## split Games GB.BIP FB.BIP LD.BIP K BB HR.flyball babip dt.sdt
## 1 0 43563 0.4654 0.3810 0.1536 0.1584 0.0859 0.0982 0.3387 0.2330
## 2 1 45330 0.4659 0.3802 0.1540 0.1585 0.0854 0.0982 0.3387 0.2327
## t.dt SBA.TOF SB.SBA
## 1 0.1109 0.1015 0.6899
## 2 0.1123 0.1013 0.6897
K.ts<- ts(season$K., start=1974, end=2014, 1)
HR.ts<- ts(season$HR.FB, start=1974, end=2014, 1)
R.ts<- ts(season$RA, start=1974, end=2014, 1)
fit <- ets(K.ts)
summary(fit)
## ETS(A,A,N)
##
## Call:
## ets(y = K.ts)
##
## Smoothing parameters:
## alpha = 0.8939
## beta = 1e-04
##
## Initial states:
## l = 0.1288
## b = 0.0018
##
## sigma: 0.0048
##
## AIC AICc BIC
## -277.9 -276.7 -271.0
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -8.662e-07 0.004783 0.003776 -0.1353 2.464 0.2118
forecast(fit, 2)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2015 0.2051 0.1990 0.2112 0.1957 0.2145
## 2016 0.2069 0.1987 0.2151 0.1943 0.2195
plot(forecast(fit, 10))
fit <- auto.arima(K.ts)
summary(fit)
## Series: K.ts
## ARIMA(0,1,0)
##
## sigma^2 estimated as 2.7e-05: log likelihood=153.7
## AIC=-305.3 AICc=-305.2 BIC=-303.6
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001774 0.00513 0.004086 1.023 2.6 0.2291
forecast(fit, 2)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2015 0.2036 0.1969 0.2103 0.1934 0.2138
## 2016 0.2036 0.1942 0.2130 0.1892 0.2180
plot(forecast(fit, 10))
#http://people.duke.edu/~rnau/411arim.htm
class(K.ts)
## [1] "ts"
fit <- arima(K.ts, c(0,1,0)) #random walk
forecast(fit, 2)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2015 0.2036 0.1969 0.2103 0.1934 0.2138
## 2016 0.2036 0.1942 0.2130 0.1892 0.2180
plot(forecast(fit, 10))
fit <- arima(K.ts, c(1,0,0), method="ML")
#first-order autoregressive model
forecast(fit, 2)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2015 0.2031 0.1964 0.2097 0.1929 0.2133
## 2016 0.2026 0.1933 0.2119 0.1883 0.2169
summary(fit) #slight mean reversion with within 1 SE of random walk
## Series: K.ts
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 intercept
## 0.987 0.165
## s.e. 0.017 0.029
##
## sigma^2 estimated as 2.69e-05: log likelihood=155.7
## AIC=-305.5 AICc=-304.8 BIC=-300.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001519 0.005186 0.004227 0.8208 2.704 0.237
names(fit)
## [1] "coef" "sigma2" "var.coef" "mask" "loglik"
## [6] "aic" "arma" "residuals" "call" "series"
## [11] "code" "n.cond" "model"
fit <- arima(K.ts, c(1,1,0)) #differenced first-order autoregressive model:
forecast(fit, 2)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2015 0.2038 0.1971 0.2104 0.1936 0.2140
## 2016 0.2038 0.1942 0.2134 0.1891 0.2184
plot(forecast(fit, 10))
fit <- arima(K.ts, c(2,0,0), method="ML")
#negative value for 2nd term within 1 se of zero
forecast(fit, 2)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2015 0.2033 0.1967 0.2099 0.1931 0.2135
## 2016 0.2028 0.1932 0.2123 0.1882 0.2173
summary(fit) #slight mean reversion with within 1 SE of random walk
## Series: K.ts
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 intercept
## 1.032 -0.046 0.165
## s.e. 0.157 0.159 0.028
##
## sigma^2 estimated as 2.68e-05: log likelihood=155.8
## AIC=-303.6 AICc=-302.4 BIC=-296.7
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001436 0.005181 0.004194 0.7709 2.686 0.2352
fit <- arima(K.ts, c(0,1,1)) #simple exponential smoothing
forecast(fit, 2)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2015 0.2038 0.1971 0.2104 0.1936 0.2139
## 2016 0.2038 0.1942 0.2133 0.1891 0.2184
plot(forecast(fit, 10))
summary(fit)
## Series: K.ts
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## 0.033
## s.e. 0.151
##
## sigma^2 estimated as 2.69e-05: log likelihood=153.7
## AIC=-303.4 AICc=-303 BIC=-300
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001721 0.005127 0.004063 0.9922 2.587 0.2278
fit <- thetaf(K.ts) #ets with drift
forecast(fit, 2)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2015 0.2045 0.1979 0.2110 0.1944 0.2145
## 2016 0.2053 0.1960 0.2146 0.1911 0.2195
## 2017 0.2062 0.1948 0.2176 0.1888 0.2236
## 2018 0.2070 0.1939 0.2202 0.1869 0.2271
## 2019 0.2079 0.1932 0.2226 0.1854 0.2304
## 2020 0.2088 0.1927 0.2249 0.1841 0.2334
## 2021 0.2096 0.1922 0.2270 0.1830 0.2362
## 2022 0.2105 0.1919 0.2291 0.1820 0.2389
## 2023 0.2113 0.1916 0.2311 0.1812 0.2415
## 2024 0.2122 0.1914 0.2330 0.1804 0.2440
plot(forecast(fit, 10))
# Test on out-of-sample one-step forecasts
f1 <- ets(K.ts[1:20])
f2 <- auto.arima(K.ts[1:20])
summary(f2)
## Series: K.ts[1:20]
## ARIMA(0,1,0)
##
## sigma^2 estimated as 2.81e-05: log likelihood=72.61
## AIC=-143.2 AICc=-143 BIC=-142.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0009915 0.005164 0.003762 0.6381 2.681 0.3951
f1.out <- ets(K.ts[21:40],model=f1)
f2.out <- Arima(K.ts[21:40],model=f2)
accuracy(f1.out)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002236 0.004872 0.004071 1.213 2.325 0.4721
accuracy(f2.out)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001998 0.00477 0.003968 1.087 2.276 0.4601
dm.test(residuals(f1.out),residuals(f2.out),h=1, alternative="greater") #indistinguishable in terms of accuracy
##
## Diebold-Mariano Test
##
## data: residuals(f1.out)residuals(f2.out)
## DM = 3.636, Forecast horizon = 1, Loss function power = 2, p-value
## = 0.0008782
## alternative hypothesis: greater
f1 <- ets(K.ts[1:40])
f2 <- auto.arima(K.ts[1:40])
K <- as.xts(K.ts*100)
HR <- as.xts(HR.ts*100)
RA <- as.xts(R.ts)
colnames(K) <- c("K")
colnames(HR) <- c("HR")
colnames(RA) <- c("RA")
KHR <- cbind(K, HR)
KR <- cbind(K, RA)
dygraph(RA)
# assign the "rainfall" series to the y2 axis
dygraph(KHR, main="K% Rolling Averages") %>%
dySeries("K", axis = 'y2')%>%
dyRoller(rollPeriod = 1)
dygraph(KR, main="K% and RA by Season") %>%
dyAxis("y", label = "RA") %>%
dyAxis("y2", label = "K%", independentTicks = TRUE) %>%
dySeries("K", axis = 'y2') %>%
dyLegend(width = 400)%>%
dyRoller(rollPeriod = 1)
To do, compare models further. Use runs per game and other stats. Use monthly data and remove seasonal effects.