6.9 Exercise #7

writing.series <- get("writing")

# plot data
plot.ts(writing.series)

# use STL decomposition to calculate the trend-cycle and seasonal indices
mstl(writing.series) %>%
  autoplot()

# seems like an exponential relationship exists (the magnitude of the seasonality is non-constant). Therefore, a Box-Cox transformation is necessary. We will also forecast with drift since naive is just basically the same value with the previous lag and cannot capture the seasonality in data. 

plot(stlf(writing.series, method="rwdrift", lambda = T))

6.9 Exercise #8

fancy.series <- get("fancy")

# plot data
plot.ts(fancy.series)

# use STL decomposition to calculate the trend-cycle and seasonal indices
mstl(fancy.series) %>%
  autoplot()

# seems like an exponential relationship exists (the magnitude of the seasonality is non-constant). Therefore, a Box-Cox transformation is necessary. We will also forecast with drift since naive is just basically the same value with the previous lag and cannot capture the seasonality in data. 
plot(stlf(fancy.series, method="rwdrift", lambda = T))

7.8 Exercise #13

#prepare data 
ausbeer.series <- get("ausbeer")
bricksq.series <- get("bricksq")
dole.series <- get("dole")
a10.series <- get("a10")
h02.series <- get("h02")
usmelec.series <- get("usmelec")

# train and test 
ausbeer.train=ts(ausbeer.series, start = 1956, end = 2006, frequency = 4)
ausbeer.test=ts(ausbeer.series, start = 2007, end = 2010, frequency = 4)

bricksq.train=ts(bricksq.series, start = 1956, end = 1990, frequency = 4)
bricksq.test=ts(bricksq.series, start = 1991, end = 1994, frequency = 4)

dole.train=ts(dole.series, start = 1956, end = 1988, frequency = 12)
dole.test=ts(dole.series, start = 1989, end = 1992, frequency = 12)

a10.train=ts(a10.series, start = 1991, end = 2004, frequency = 12)
a10.test=ts(a10.series, start = 2005, end = 2008, frequency = 12)

h02.train=ts(h02.series, start = 1991, end = 2004, frequency = 12)
h02.test=ts(h02.series, start = 2005, end = 2008, frequency = 12)

usmelec.train=ts(usmelec.series, start = 1973, end = 2009, frequency = 12)
usmelec.test=ts(usmelec.series, start = 2009, end = 2013, frequency = 12)

# Build a function to run ets(), snaive() and stlf() and return the accuracy

compare <- function(traindata, testdata, hforecast = 12, lambdaNo = F) {
  # ETS 
  a1 <- traindata %>% ets(model="ZZZ") %>% forecast(h = hforecast) %>%
    accuracy(testdata)
  
  print(a1[,c("RMSE","MAE","MAPE","MASE")])
  
  # snaive
  a2 <- snaive(traindata, h = hforecast)  %>%
    accuracy(testdata)
  
  print(a2[,c("RMSE","MAE","MAPE","MASE")])
  
  # stlf()
  a3<- stlf(traindata, h = hforecast, lambda = lambdaNo) %>%
    accuracy(testdata)
  
  print(a3[,c("RMSE","MAE","MAPE","MASE")])
  
}

# ausbeer
plot.ts(ausbeer.train)

compare(ausbeer.train, ausbeer.test, hforecast = 14)

##                  RMSE       MAE      MAPE      MASE
## Training set  15.9596  12.20538  2.921393  0.762111
## Test set     165.7248 165.40930 65.755363 10.328251
##                   RMSE       MAE      MAPE    MASE
## Training set  19.85314  16.01523  3.814617  1.0000
## Test set     172.14661 171.90909 68.475947 10.7341
##                   RMSE       MAE      MAPE       MASE
## Training set  13.74698  10.60279  2.539117  0.6620441
## Test set     167.62370 167.23776 66.420164 10.4424210

# => ets got the best result on Test set 

# bricksq
plot.ts(bricksq.train)

compare(bricksq.train, bricksq.test, hforecast = 14, lambdaNo = T)

##                   RMSE       MAE       MAPE      MASE
## Training set  21.53635  15.34056   3.814251 0.4372684
## Test set     309.73410 308.97589 146.924088 8.8070711
##                   RMSE       MAE       MAPE     MASE
## Training set  48.75811  35.08271   8.638005 1.000000
## Test set     325.97602 324.36364 153.975118 9.245684
##                  RMSE       MAE       MAPE      MASE
## Training set  19.6667  13.95614   3.458193 0.3978068
## Test set     309.5347 308.74632 146.823636 8.8005272

# => stlf got the best result on Test set

# dole
plot.ts(dole.train)

compare(dole.train, dole.test, hforecast = 38, lambdaNo = T)

##                  RMSE        MAE        MAPE       MASE
## Training set  16565.6   9323.031    6.289154  0.3201815
## Test set     440880.2 439414.676 4031.172512 15.0908512
##                   RMSE       MAE       MAPE     MASE
## Training set  54891.12  29117.95   28.19218  1.00000
## Test set     530775.02 528899.22 4808.59064 18.16403
##                    RMSE        MAE       MAPE       MASE
## Training set   6406.613   3879.101    5.16386  0.1332202
## Test set     379993.734 378980.688 3469.93952 13.0153622

# => ets got the best result on Test set 

# a10.train
plot.ts(a10.train)

compare(a10.train, a10.test, hforecast = 38, lambdaNo = T)

##                    RMSE        MAE       MAPE       MASE
## Training set  0.4726965  0.3353551   3.996674  0.3698757
## Test set     13.8274319 13.7075375 355.419516 15.1185578
##                   RMSE        MAE      MAPE     MASE
## Training set  1.052731  0.9066696  11.11561  1.00000
## Test set     10.254965 10.1843937 267.80801 11.23275
##                    RMSE        MAE       MAPE       MASE
## Training set  0.4617717  0.3425413   4.320592  0.3778017
## Test set     13.0889944 13.0687409 342.914258 14.4140051

# => snaive got the best result on Test set 

# h02.train
plot.ts(h02.train)

compare(h02.train, h02.test, hforecast = 38, lambdaNo = T)

##                    RMSE        MAE       MAPE      MASE
## Training set 0.04410859 0.03256959   4.397991 0.5564493
## Test set     0.49887462 0.48749438 100.927086 8.3288092
##                    RMSE       MAE      MAPE     MASE
## Training set 0.07058749 0.0585311  8.171465 1.000000
## Test set     0.47062025 0.4592449 95.960370 7.846169
##                    RMSE        MAE       MAPE     MASE
## Training set 0.04019418 0.03032672   4.375312 0.518130
## Test set     0.56174084 0.55767827 117.103341 9.527896

# => snaive got the best result on Test set 

# usmelec.train
plot.ts(usmelec.train)

compare(usmelec.train, usmelec.test, hforecast = 38, lambdaNo = T)

##                    RMSE        MAE       MAPE       MASE
## Training set   7.231095   5.444088   2.140116  0.6204376
## Test set     195.353477 194.022361 122.780708 22.1118338
##                   RMSE        MAE       MAPE     MASE
## Training set  11.32383   8.774594   3.475778  1.00000
## Test set     185.10317 183.919158 116.547828 20.96042
##                    RMSE        MAE       MAPE      MASE
## Training set   6.198652   4.713299   1.869173  0.537153
## Test set     193.984058 192.891050 122.132056 21.982904

# => snaive got the best result on Test set

8.11 Exercise #18

#a. Select a time series from Datamarket. Then copy its short URL and import the data using
iceland.unemployed <- ts(rdatamarket::dmseries("https://datamarket.com/data/set/ybz/number-of-unemployed#!ds=ybz!2m5e=1&display=line")[,1],
        start=2001, frequency=12)

#b. Plot graphs of the data, and try to identify an appropriate ARIMA model.
plot.ts(iceland.unemployed)

# Using R's default ADF test we get: detrend data
adf.test(iceland.unemployed
         , alternative = "stationary"
         , k = 12
)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  iceland.unemployed
## Dickey-Fuller = -3.1098, Lag order = 12, p-value = 0.1128
## alternative hypothesis: stationary

# First look at ACF/PACF
ggAcf(iceland.unemployed)

ggPacf(iceland.unemployed)

# Build ARIMA
# first difference
diff1 = diff(iceland.unemployed)

# plot data
plot.ts(diff1)

ggAcf(diff1)

ggPacf(diff1)

fit.arima <- Arima(iceland.unemployed, order=c(1,1,0), seasonal=c(1,1,1))
summary(fit.arima)

## Series: iceland.unemployed 
## ARIMA(1,1,0)(1,1,1)[12] 
## 
## Coefficients:
##          ar1    sar1     sma1
##       0.7794  0.2162  -0.9822
## s.e.  0.0492  0.1046   0.5604
## 
## sigma^2 estimated as 79819:  log likelihood=-1141.14
## AIC=2290.29   AICc=2290.55   BIC=2302.59
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE       MASE
## Training set -4.872463 269.1403 171.2051 0.2226207 3.683212 0.08736699
##                      ACF1
## Training set -0.009152117

tsdisplay(residuals(fit.arima))

#c. Do residual diagnostic checking of your ARIMA model. Are the residuals white noise? No, they do not look white noise.
#check residual
ggAcf(fit.arima$residuals)

ggPacf(fit.arima$residuals)

checkresiduals(fit.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)(1,1,1)[12]
## Q* = 40.514, df = 21, p-value = 0.006436
## 
## Model df: 3.   Total lags used: 24

#d. Use your chosen ARIMA model to forecast the next four years.
arima.forecast <- forecast(fit.arima, h = 48)
plot(arima.forecast)

#e. Now try to identify an appropriate ETS model.

fit.ets <- ets(iceland.unemployed, model="AAN", alpha=NULL, beta=NULL,
    gamma=NULL, phi=NULL, lambda=NULL, biasadj=FALSE,
    additive.only=FALSE, restrict=TRUE,
    allow.multiplicative.trend=FALSE)

#f. Do residual diagnostic checking of your ETS model. Are the residuals white noise? No the residuals do not look white noise. There are several outliers.

checkresiduals(fit.ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,Ad,N)
## Q* = 135.8, df = 19, p-value < 2.2e-16
## 
## Model df: 5.   Total lags used: 24

#g. Use your chosen ETS model to forecast the next four years.

ets.forecast <- forecast(fit.ets, h = 48)
plot(ets.forecast)

#h. Which of the two models do you prefer? ACF and PACF and residual plots of ARIMA look better. I would go with Arima.

HW 1

Nam Nguyen

7/19/2018

6.9 Exercise #7

6.9 Exercise #8

7.8 Exercise #13

8.11 Exercise #18