library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(fpp2)
## Loading required package: fma
## Loading required package: expsmooth
payments_all <- read.csv('lambda_payments_clean.csv')
View(payments_all)
colnames(payments_all) [1] <- "date"
#create timeseries for payments
head(payments_all)
## date payments
## 1 Aug-17 2
## 2 Sep-17 2
## 3 Oct-17 2
## 4 Dec-17 7
## 5 Jan-18 7
## 6 Feb-18 7
ts_p <- ts(payments_all[2], start = c(2017, 8), frequency = 12)
#plot timeseries
autoplot(ts_p)

ggseasonplot(ts_p)

#lag plot and ACF
gglagplot(ts_p)

ggAcf(ts_p)

#plot naive forecast
fcn_p <- naive(ts_p, h= 12)
autoplot(fcn_p, series = "Data")+
autolayer(fitted(fcn_p), series = "Fitted")
## Warning: Removed 1 row(s) containing missing values (geom_path).

summary(fcn_p)
##
## Forecast method: Naive method
##
## Model Information:
## Call: naive(y = ts_p, h = 12)
##
## Residual sd: 12.2614
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 5.666667 13.35207 5.666667 9.301885 9.301885 0.132771 0.8341991
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 2020 206 188.8886 223.1114 179.8304 232.1696
## Oct 2020 206 181.8009 230.1991 168.9906 243.0094
## Nov 2020 206 176.3622 235.6378 160.6730 251.3270
## Dec 2020 206 171.7773 240.2227 153.6608 258.3392
## Jan 2021 206 167.7378 244.2622 147.4830 264.5170
## Feb 2021 206 164.0859 247.9141 141.8979 270.1021
## Mar 2021 206 160.7276 251.2724 136.7618 275.2382
## Apr 2021 206 157.6017 254.3983 131.9813 280.0187
## May 2021 206 154.6659 257.3341 127.4913 284.5087
## Jun 2021 206 151.8891 260.1109 123.2445 288.7555
## Jul 2021 206 149.2480 262.7520 119.2053 292.7947
## Aug 2021 206 146.7245 265.2755 115.3459 296.6541
checkresiduals(fcn_p)

##
## Ljung-Box test
##
## data: Residuals from Naive method
## Q* = 51.223, df = 7, p-value = 8.305e-09
##
## Model df: 0. Total lags used: 7
#create test and training sets for the forecasts
training <- subset(ts_p, start = 25, end = 33)
test <- subset(ts_p, start = 34)
#forecast naive and mean with training set
fcn_tr <- naive(training, h = 6)
fcm_tr <- meanf(training, h =6)
#plot forecasts against test
autoplot(fcn_tr, series = "training")+
autolayer(test, series = "test")

autoplot(fcm_tr, series = "training")+
autolayer(test, series = "test")

#check accuracy of forecasts
accuracy(fcn_tr, ts_p)
## ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
## Training set 17.25 25.23391 17.25 28.80358 28.80358 NaN 0.7014695 NA
## Test set 47.00 49.82469 47.00 23.87008 23.87008 NaN 0.1663620 2.918558
accuracy(fcm_tr, ts_p)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.156426e-15 47.13574 39.77778 -188.94365 224.09280 NaN
## Test set 1.503333e+02 151.24024 150.33333 78.12359 78.12359 NaN
## ACF1 Theil's U
## Training set 0.5755762 NA
## Test set 0.1663620 8.36556
#forecasting using basic exponential smoothing
fcx_p <- ses(training, h = 6)
summary(fcx_p)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = training, h = 6)
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 7.0243
##
## sigma: 26.9782
##
## AIC AICc BIC
## 82.82371 87.62371 83.41538
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 15.33168 23.79253 15.33708 25.56601 25.64309 NaN 0.7089655
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2020 144.9956 110.42166 179.5695 92.11932 197.8719
## Jun 2020 144.9956 96.10311 193.8881 70.22099 219.7702
## Jul 2020 144.9956 85.11577 204.8754 53.41731 236.5739
## Aug 2020 144.9956 75.85291 214.1383 39.25098 250.7402
## Sep 2020 144.9956 67.69211 222.2991 26.77011 263.2211
## Oct 2020 144.9956 60.31415 229.6770 15.48650 274.5047
autoplot(fcx_p)

checkresiduals(fcx_p) #notice how p-value has increased from previous forecasting methods so we are using more of the relevent data to build the forecast

##
## Ljung-Box test
##
## data: Residuals from Simple exponential smoothing
## Q* = 12.043, df = 3, p-value = 0.007236
##
## Model df: 2. Total lags used: 5
#holt forecast
fch_p <- holt(ts_p, h = 6, damped = TRUE)
summary(fch_p)
##
## Forecast method: Damped Holt's method
##
## Model Information:
## Damped Holt's method
##
## Call:
## holt(y = ts_p, h = 6, damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.9999
## phi = 0.8684
##
## Initial states:
## l = 2.0472
## b = 0.1231
##
## sigma: 7.0265
##
## AIC AICc BIC
## 284.5087 287.3087 294.1742
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.7461751 6.534467 3.188133 3.009567 8.963955 0.07469852 0.2032704
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 2020 206.8698 197.8650 215.8745 193.0982 220.6414
## Oct 2020 207.6238 188.5433 226.7044 178.4426 236.8051
## Nov 2020 208.2787 177.9212 238.6361 161.8509 254.7064
## Dec 2020 208.8473 166.5121 251.1825 144.1012 273.5934
## Jan 2021 209.3411 154.6481 264.0342 125.6953 292.9869
## Feb 2021 209.7699 142.5533 276.9865 106.9710 312.5688
autoplot(fch_p)

checkresiduals(fch_p) #p-value still less than 0.05 so not using all relevent data

##
## Ljung-Box test
##
## data: Residuals from Damped Holt's method
## Q* = 8.1707, df = 3, p-value = 0.04261
##
## Model df: 5. Total lags used: 8
#finding the best exponential forecasting model
ets_p <- ets(training)
summary(ets_p)
## ETS(M,N,N)
##
## Call:
## ets(y = training)
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 5.2537
##
## sigma: 0.7551
##
## AIC AICc BIC
## 67.73780 72.53780 68.32947
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 15.52844 23.79965 15.52844 28.37675 28.37675 NaN 0.7142775
autoplot(forecast(ets_p, h = 6)) #does not work for this timeseries

checkresiduals(ets_p)

##
## Ljung-Box test
##
## data: Residuals from ETS(M,N,N)
## Q* = 9.3721, df = 3, p-value = 0.02473
##
## Model df: 2. Total lags used: 5
#fit automatic ARIMA model
fca_p <- auto.arima(ts_p)
summary(fca_p)
## Series: ts_p
## ARIMA(0,2,0)
##
## sigma^2 estimated as 48.31: log likelihood=-117.52
## AIC=237.05 AICc=237.17 BIC=238.6
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02697868 6.760377 3.108205 2.267243 8.806969 0.07282579
## ACF1
## Training set 0.1683342
fca_p %>% forecast(h = 6) %>% autoplot()

checkresiduals(fca_p) #residuals greater than .05 yay!

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,0)
## Q* = 7.5824, df = 7, p-value = 0.3708
##
## Model df: 0. Total lags used: 7
#omit stepwise search
fca2_p <- auto.arima(ts_p, stepwise = FALSE)
summary(fca2_p)
## Series: ts_p
## ARIMA(0,2,0)
##
## sigma^2 estimated as 48.31: log likelihood=-117.52
## AIC=237.05 AICc=237.17 BIC=238.6
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02697868 6.760377 3.108205 2.267243 8.806969 0.07282579
## ACF1
## Training set 0.1683342
fca2_p %>% forecast(h = 6) %>% autoplot()

checkresiduals(fca2_p) #same ARIMA model chosen (0,2,0)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,0)
## Q* = 7.5824, df = 7, p-value = 0.3708
##
## Model df: 0. Total lags used: 7
#investigate regression trend between payments and employment
payments_employ <- read.csv('lambda_payment vs employment.csv')
colnames(payments_employ) [1] <- "date"
head(payments_employ)
## date payments employ
## 1 Aug-17 2 2
## 2 Sep-17 2 3
## 3 Oct-17 2 3
## 4 Nov-17 2 10
## 5 Dec-17 7 10
## 6 Jan-18 7 10
ggplot(data = payments_employ)+
geom_point(aes(x= payments, y=employ))

pay_employ.lm = lm(employ ~ payments, data = payments_employ)
coeffs = coefficients(pay_employ.lm); coeffs
## (Intercept) payments
## 24.490013 4.205591
summary(pay_employ.lm) #clear correlation between employ and payments
##
## Call:
## lm(formula = employ ~ payments, data = payments_employ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -77.96 -41.93 -26.43 17.58 174.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.4900 11.7078 2.092 0.044 *
## payments 4.2056 0.2213 19.004 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 62.16 on 34 degrees of freedom
## Multiple R-squared: 0.914, Adjusted R-squared: 0.9114
## F-statistic: 361.1 on 1 and 34 DF, p-value: < 2.2e-16
#create new timeseries object with payment and employ data
ts_ep <- ts(payments_employ[2:3], start = c(2017, 8), frequency = 12)
autoplot(ts_ep, facets = TRUE)

#create training and test sets
head(ts_ep)
## payments employ
## Aug 2017 2 2
## Sep 2017 2 3
## Oct 2017 2 3
## Nov 2017 2 10
## Dec 2017 7 10
## Jan 2018 7 10
train_ep <- subset(ts_ep, end = 33)
test_ep <- subset(ts_ep, start = 34)
#create dynamic regression forecast
fit <- auto.arima(ts_ep[,"payments"], xreg = ts_ep[,"employ"])
summary(fit)
## Series: ts_ep[, "payments"]
## Regression with ARIMA(1,1,0) errors
##
## Coefficients:
## ar1 xreg
## 0.9097 -0.0044
## s.e. 0.0713 0.0411
##
## sigma^2 estimated as 40.59: log likelihood=-114.32
## AIC=234.64 AICc=235.42 BIC=239.31
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.16738 6.099588 2.634499 3.403425 8.84798 0.09367107 0.1990126
checkresiduals(fit) #p-value greater than 0.5 yay!

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,0) errors
## Q* = 4.8954, df = 5, p-value = 0.4288
##
## Model df: 2. Total lags used: 7
fcdr_ep <- forecast(fit, xreg = rep(25, 12))
autoplot(fcdr_ep)+
xlab("Year")+
ylab("Number of Payments")+
ggtitle("Forecasts of Lambda Payments from Regression with ARIMA(1,1,0) errors")

summary(fcdr_ep)
##
## Forecast method: Regression with ARIMA(1,1,0) errors
##
## Model Information:
## Series: ts_ep[, "payments"]
## Regression with ARIMA(1,1,0) errors
##
## Coefficients:
## ar1 xreg
## 0.9097 -0.0044
## s.e. 0.0713 0.0411
##
## sigma^2 estimated as 40.59: log likelihood=-114.32
## AIC=234.64 AICc=235.42 BIC=239.31
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.16738 6.099588 2.634499 3.403425 8.84798 0.09367107 0.1990126
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Aug 2020 219.7621 211.5976 227.9266 207.2755 232.2486
## Sep 2020 242.1127 224.5127 259.7127 215.1958 269.0296
## Oct 2020 262.4448 233.9984 290.8912 218.9398 305.9498
## Nov 2020 280.9407 240.6775 321.2039 219.3634 342.5179
## Dec 2020 297.7661 245.0125 350.5198 217.0864 378.4458
## Jan 2021 313.0721 247.3654 378.7787 212.5824 413.5617
## Feb 2021 326.9957 248.0294 405.9620 206.2272 447.7642
## Mar 2021 339.6618 247.2476 432.0761 198.3264 480.9973
## Apr 2021 351.1841 245.2246 457.1436 189.1330 513.2351
## May 2021 361.6657 242.1348 481.1967 178.8589 544.4726
## Jun 2021 371.2008 238.1275 504.2741 167.6827 574.7188
## Jul 2021 379.8747 233.3318 526.4176 155.7566 603.9927
#compare to regular ARIMA model
reg <- auto.arima(ts_ep[,"payments"])
fca_ep <- forecast(reg, h = 12)
checkresiduals(reg)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,0)
## Q* = 6.9766, df = 7, p-value = 0.4313
##
## Model df: 0. Total lags used: 7
summary(reg)
## Series: ts_ep[, "payments"]
## ARIMA(0,2,0)
##
## sigma^2 estimated as 39.74: log likelihood=-110.84
## AIC=223.68 AICc=223.81 BIC=225.21
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.7499503 6.125992 2.472322 2.681737 8.700093 0.08790477 0.1498705
autoplot(fca_ep)+
xlab("Year")+
ylab("Number of Payments")+
ggtitle("Forecasts of Lambda Payments from ARIMA(0,2,0) errors")

summary(fca_ep)
##
## Forecast method: ARIMA(0,2,0)
##
## Model Information:
## Series: ts_ep[, "payments"]
## ARIMA(0,2,0)
##
## sigma^2 estimated as 39.74: log likelihood=-110.84
## AIC=223.68 AICc=223.81 BIC=225.21
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.7499503 6.125992 2.472322 2.681737 8.700093 0.08790477 0.1498705
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Aug 2020 219 210.9216 227.0784 206.6452 231.3548
## Sep 2020 246 227.9362 264.0638 218.3738 273.6262
## Oct 2020 273 242.7735 303.2265 226.7725 319.2275
## Nov 2020 300 255.7529 344.2471 232.3299 367.6701
## Dec 2020 327 267.0891 386.9109 235.3742 418.6258
## Jan 2021 354 276.9372 431.0628 236.1426 471.8574
## Feb 2021 381 285.4153 476.5847 234.8158 527.1842
## Mar 2021 408 292.6176 523.3824 231.5379 584.4621
## Apr 2021 435 298.6212 571.3788 226.4267 643.5733
## May 2021 462 303.4907 620.5093 219.5810 704.4190
## Jun 2021 489 307.2813 670.7187 211.0853 766.9147
## Jul 2021 516 310.0409 721.9591 201.0127 830.9873