##       year          month            GS            IP      
##  Min.   :1974   Min.   : 3.0   Min.   :  2   Min.   :  17  
##  1st Qu.:1984   1st Qu.: 5.0   1st Qu.:663   1st Qu.:5928  
##  Median :1995   Median : 7.0   Median :732   Median :6531  
##  Mean   :1994   Mean   : 6.7   Mean   :640   Mean   :5719  
##  3rd Qu.:2004   3rd Qu.: 8.0   3rd Qu.:795   3rd Qu.:7096  
##  Max.   :2014   Max.   :10.0   Max.   :872   Max.   :7763  
##                                                            
##       TBF              H              HR             R       
##  Min.   :   70   Min.   :  14   Min.   :   1   Min.   :   5  
##  1st Qu.:25388   1st Qu.:5770   1st Qu.: 464   1st Qu.:2720  
##  Median :27906   Median :6528   Median : 634   Median :3216  
##  Mean   :24550   Mean   :5714   Mean   : 590   Mean   :2875  
##  3rd Qu.:30581   3rd Qu.:7140   3rd Qu.: 811   3rd Qu.:3667  
##  Max.   :33963   Max.   :8124   Max.   :1069   Max.   :4330  
##                                                              
##        ER             SO             BB             RA      
##  Min.   :   4   Min.   :  14   Min.   :   3   Min.   :1.93  
##  1st Qu.:2394   1st Qu.:3351   1st Qu.:2139   1st Qu.:4.20  
##  Median :2909   Median :4144   Median :2384   Median :4.44  
##  Mean   :2612   Mean   :3891   Mean   :2105   Mean   :4.47  
##  3rd Qu.:3355   3rd Qu.:5198   3rd Qu.:2600   3rd Qu.:4.79  
##  Max.   :3989   Max.   :6467   Max.   :3137   Max.   :6.17  
##                                                             
##       ERA       HR.battedball         GB.             FB.       
##  Min.   :1.54   Min.   :0.0164   Min.   :0.364   Min.   :0.312  
##  1st Qu.:3.79   1st Qu.:0.0324   1st Qu.:0.441   1st Qu.:0.344  
##  Median :4.04   Median :0.0368   Median :0.452   Median :0.368  
##  Mean   :4.05   Mean   :0.0367   Mean   :0.466   Mean   :0.382  
##  3rd Qu.:4.37   3rd Qu.:0.0397   3rd Qu.:0.500   3rd Qu.:0.424  
##  Max.   :6.17   Max.   :0.0701   Max.   :0.557   Max.   :0.500  
##                                                                 
##       LD.             HR.FB              K.             BB.        
##  Min.   :0.0098   Min.   :0.0417   Min.   :0.113   Min.   :0.0208  
##  1st Qu.:0.0704   1st Qu.:0.0814   1st Qu.:0.139   1st Qu.:0.0823  
##  Median :0.1922   Median :0.0967   Median :0.160   Median :0.0855  
##  Mean   :0.1525   Mean   :0.0969   Mean   :0.159   Mean   :0.0861  
##  3rd Qu.:0.2105   3rd Qu.:0.1100   3rd Qu.:0.173   3rd Qu.:0.0890  
##  Max.   :0.2955   Max.   :0.1842   Max.   :0.299   Max.   :0.1856  
##                                                                    
##      BABIP          XBHrate         X3b.XBH           IFFB.      
##  Min.   :0.189   Min.   :0.133   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.298   1st Qu.:0.217   1st Qu.:0.0938   1st Qu.:0.148  
##  Median :0.306   Median :0.232   Median :0.1079   Median :0.281  
##  Mean   :0.338   Mean   :0.232   Mean   :0.1154   Mean   :0.243  
##  3rd Qu.:0.392   3rd Qu.:0.248   3rd Qu.:0.1322   3rd Qu.:0.304  
##  Max.   :0.497   Max.   :0.440   Max.   :1.0000   Max.   :0.467  
##                                                                  
##     SBArate            SB.     
##  Min.   :0.0000   0.6609 :  4  
##  1st Qu.:0.0879   0.6667 :  3  
##  Median :0.1035   0.7141 :  3  
##  Mean   :0.1025   0.0000 :  2  
##  3rd Qu.:0.1166   0.6389 :  2  
##  Max.   :0.3500   0.6663 :  2  
##                   (Other):267
##  [1] "year"          "month"         "GS"            "IP"           
##  [5] "TBF"           "ERA"           "RA"            "HR.battedball"
##  [9] "GB."           "FB."           "LD."           "HR.FB"        
## [13] "K."            "BB."           "BABIP"         "XBHrate"      
## [17] "X3b.XBH"       "IFFB."         "SBArate"       "SB."
##   month      PA R.game GB.BIP FB.BIP LD.BIP    K   BB HR.flyball babip
## 1     4  931502    0.5    1.2   -0.5   -2.8 -1.1  5.9       -1.7  -1.7
## 2     5 1167848    0.1    0.7   -0.1   -1.1 -0.8  1.8        0.2  -0.2
## 3     6 1147107    0.5    0.0   -0.1   -0.2 -0.9 -1.1        2.1   0.6
## 4     7 1121828    1.0   -0.3   -0.1    1.5 -1.0 -2.6        2.0   0.5
## 5     8 1215853   -0.1   -0.7    0.3    1.5 -0.2 -2.6        0.8   0.5
## 6     9 1149558   -1.4   -0.5    0.2    0.5  3.1  0.1       -2.9   0.0
##   dt.sdt t.dt SBA.TOF SB.SBA
## 1    2.5 -0.9     3.2   -0.9
## 2    0.5 -0.8     3.4   -0.9
## 3   -0.2 -0.5     0.1   -0.3
## 4   -0.2 -0.8    -2.2   -0.3
## 5   -1.0 -0.2    -3.7    0.1
## 6   -1.1  2.3    -0.4    2.1
##   split Games GB.BIP FB.BIP LD.BIP      K     BB HR.flyball  babip dt.sdt
## 1     0 43563 0.4654 0.3810 0.1536 0.1584 0.0859     0.0982 0.3387 0.2330
## 2     1 45330 0.4659 0.3802 0.1540 0.1585 0.0854     0.0982 0.3387 0.2327
##     t.dt SBA.TOF SB.SBA
## 1 0.1109  0.1015 0.6899
## 2 0.1123  0.1013 0.6897

plot of chunk unnamed-chunk-4

plot of chunk unnamed-chunk-5

plot of chunk unnamed-chunk-6

K.ts<- ts(season$K., start=1974, end=2014, 1)
HR.ts<- ts(season$HR.FB, start=1974, end=2014, 1)
R.ts<- ts(season$RA, start=1974, end=2014, 1)

fit <- ets(K.ts)
summary(fit)
## ETS(A,A,N) 
## 
## Call:
##  ets(y = K.ts) 
## 
##   Smoothing parameters:
##     alpha = 0.8939 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 0.1288 
##     b = 0.0018 
## 
##   sigma:  0.0048
## 
##    AIC   AICc    BIC 
## -277.9 -276.7 -271.0 
## 
## Training set error measures:
##                      ME     RMSE      MAE     MPE  MAPE   MASE
## Training set -8.662e-07 0.004783 0.003776 -0.1353 2.464 0.2118
forecast(fit, 2)
##      Point Forecast  Lo 80  Hi 80  Lo 95  Hi 95
## 2015         0.2051 0.1990 0.2112 0.1957 0.2145
## 2016         0.2069 0.1987 0.2151 0.1943 0.2195
plot(forecast(fit, 10))

plot of chunk unnamed-chunk-7

fit <- auto.arima(K.ts)
summary(fit) 
## Series: K.ts 
## ARIMA(0,1,0)                    
## 
## sigma^2 estimated as 2.7e-05:  log likelihood=153.7
## AIC=-305.3   AICc=-305.2   BIC=-303.6
## 
## Training set error measures:
##                    ME    RMSE      MAE   MPE MAPE   MASE
## Training set 0.001774 0.00513 0.004086 1.023  2.6 0.2291
forecast(fit, 2)
##      Point Forecast  Lo 80  Hi 80  Lo 95  Hi 95
## 2015         0.2036 0.1969 0.2103 0.1934 0.2138
## 2016         0.2036 0.1942 0.2130 0.1892 0.2180
plot(forecast(fit, 10))

plot of chunk unnamed-chunk-7

#http://people.duke.edu/~rnau/411arim.htm
class(K.ts)
## [1] "ts"
fit <- arima(K.ts, c(0,1,0)) #random walk
forecast(fit, 2)
##      Point Forecast  Lo 80  Hi 80  Lo 95  Hi 95
## 2015         0.2036 0.1969 0.2103 0.1934 0.2138
## 2016         0.2036 0.1942 0.2130 0.1892 0.2180
plot(forecast(fit, 10))

plot of chunk unnamed-chunk-8

fit <- arima(K.ts, c(1,0,0), method="ML") 
#first-order autoregressive model
forecast(fit, 2)
##      Point Forecast  Lo 80  Hi 80  Lo 95  Hi 95
## 2015         0.2031 0.1964 0.2097 0.1929 0.2133
## 2016         0.2026 0.1933 0.2119 0.1883 0.2169
summary(fit) #slight mean reversion with within 1 SE of random walk
## Series: K.ts 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##         ar1  intercept
##       0.987      0.165
## s.e.  0.017      0.029
## 
## sigma^2 estimated as 2.69e-05:  log likelihood=155.7
## AIC=-305.5   AICc=-304.8   BIC=-300.3
## 
## Training set error measures:
##                    ME     RMSE      MAE    MPE  MAPE  MASE
## Training set 0.001519 0.005186 0.004227 0.8208 2.704 0.237
names(fit)
##  [1] "coef"      "sigma2"    "var.coef"  "mask"      "loglik"   
##  [6] "aic"       "arma"      "residuals" "call"      "series"   
## [11] "code"      "n.cond"    "model"
fit <- arima(K.ts, c(1,1,0)) #differenced first-order autoregressive model:
forecast(fit, 2)
##      Point Forecast  Lo 80  Hi 80  Lo 95  Hi 95
## 2015         0.2038 0.1971 0.2104 0.1936 0.2140
## 2016         0.2038 0.1942 0.2134 0.1891 0.2184
plot(forecast(fit, 10))

plot of chunk unnamed-chunk-8

fit <- arima(K.ts, c(2,0,0), method="ML") 
#negative value for 2nd term within 1 se of zero
forecast(fit, 2)
##      Point Forecast  Lo 80  Hi 80  Lo 95  Hi 95
## 2015         0.2033 0.1967 0.2099 0.1931 0.2135
## 2016         0.2028 0.1932 0.2123 0.1882 0.2173
summary(fit) #slight mean reversion with within 1 SE of random walk
## Series: K.ts 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##         ar1     ar2  intercept
##       1.032  -0.046      0.165
## s.e.  0.157   0.159      0.028
## 
## sigma^2 estimated as 2.68e-05:  log likelihood=155.8
## AIC=-303.6   AICc=-302.4   BIC=-296.7
## 
## Training set error measures:
##                    ME     RMSE      MAE    MPE  MAPE   MASE
## Training set 0.001436 0.005181 0.004194 0.7709 2.686 0.2352
fit <- arima(K.ts, c(0,1,1)) #simple exponential smoothing 
forecast(fit, 2)
##      Point Forecast  Lo 80  Hi 80  Lo 95  Hi 95
## 2015         0.2038 0.1971 0.2104 0.1936 0.2139
## 2016         0.2038 0.1942 0.2133 0.1891 0.2184
plot(forecast(fit, 10))

plot of chunk unnamed-chunk-8

summary(fit)
## Series: K.ts 
## ARIMA(0,1,1)                    
## 
## Coefficients:
##         ma1
##       0.033
## s.e.  0.151
## 
## sigma^2 estimated as 2.69e-05:  log likelihood=153.7
## AIC=-303.4   AICc=-303   BIC=-300
## 
## Training set error measures:
##                    ME     RMSE      MAE    MPE  MAPE   MASE
## Training set 0.001721 0.005127 0.004063 0.9922 2.587 0.2278
fit <- thetaf(K.ts) #ets with drift
forecast(fit, 2)
##      Point Forecast  Lo 80  Hi 80  Lo 95  Hi 95
## 2015         0.2045 0.1979 0.2110 0.1944 0.2145
## 2016         0.2053 0.1960 0.2146 0.1911 0.2195
## 2017         0.2062 0.1948 0.2176 0.1888 0.2236
## 2018         0.2070 0.1939 0.2202 0.1869 0.2271
## 2019         0.2079 0.1932 0.2226 0.1854 0.2304
## 2020         0.2088 0.1927 0.2249 0.1841 0.2334
## 2021         0.2096 0.1922 0.2270 0.1830 0.2362
## 2022         0.2105 0.1919 0.2291 0.1820 0.2389
## 2023         0.2113 0.1916 0.2311 0.1812 0.2415
## 2024         0.2122 0.1914 0.2330 0.1804 0.2440
plot(forecast(fit, 10))

plot of chunk unnamed-chunk-8

# Test on out-of-sample one-step forecasts
f1 <- ets(K.ts[1:20])
f2 <- auto.arima(K.ts[1:20])

summary(f2)
## Series: K.ts[1:20] 
## ARIMA(0,1,0)                    
## 
## sigma^2 estimated as 2.81e-05:  log likelihood=72.61
## AIC=-143.2   AICc=-143   BIC=-142.3
## 
## Training set error measures:
##                     ME     RMSE      MAE    MPE  MAPE   MASE
## Training set 0.0009915 0.005164 0.003762 0.6381 2.681 0.3951
f1.out <- ets(K.ts[21:40],model=f1)
f2.out <- Arima(K.ts[21:40],model=f2)

accuracy(f1.out)
##                    ME     RMSE      MAE   MPE  MAPE   MASE
## Training set 0.002236 0.004872 0.004071 1.213 2.325 0.4721
accuracy(f2.out)
##                    ME    RMSE      MAE   MPE  MAPE   MASE
## Training set 0.001998 0.00477 0.003968 1.087 2.276 0.4601
dm.test(residuals(f1.out),residuals(f2.out),h=1, alternative="greater") #indistinguishable in terms of accuracy
## 
##  Diebold-Mariano Test
## 
## data:  residuals(f1.out)residuals(f2.out)
## DM = 3.636, Forecast horizon = 1, Loss function power = 2, p-value
## = 0.0008782
## alternative hypothesis: greater
f1 <- ets(K.ts[1:40])
f2 <- auto.arima(K.ts[1:40])
K <- as.xts(K.ts*100)
HR <- as.xts(HR.ts*100)
RA <- as.xts(R.ts)
colnames(K) <- c("K")
colnames(HR) <- c("HR")
colnames(RA) <- c("RA")
KHR <- cbind(K, HR)
KR <- cbind(K, RA)

dygraph(RA)

# assign the "rainfall" series to the y2 axis

dygraph(KHR, main="K% Rolling Averages") %>%
 dySeries("K", axis = 'y2')%>% 
  dyRoller(rollPeriod = 1)

dygraph(KR, main="K% and RA by Season") %>%
  dyAxis("y", label = "RA") %>%
  dyAxis("y2", label = "K%", independentTicks = TRUE) %>%
  dySeries("K", axis = 'y2') %>%
  dyLegend(width = 400)%>% 
  dyRoller(rollPeriod = 1)

To do, compare models further. Use runs per game and other stats. Use monthly data and remove seasonal effects.