# Prepare needed packages
packages <- c("psych", "tidyr", "ggplot2",
                "caret", "plyr", "car", "fpp",
                "forecast","ResourceSelection",
                "ggpubr", "scales", "stringr",
                "gridExtra", "pROC", "reshape2", 
                "corrplot","glmnet", "dplyr",
                "knitr", "purrr", "readxl", "seasonal", 
                "fpp2", "Metrics", "urca", "Quandl", "timeSeries")
  for (i in 1:length(packages)) {
    if (!packages[i] %in% rownames(installed.packages())) {
      install.packages(packages[i])
    }
    library(packages[i], character.only = TRUE) # Loads package in
  }

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## Loading required package: lattice

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

## Loading required package: forecast

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Loading required package: fma

## 
## Attaching package: 'fma'

## The following object is masked from 'package:plyr':
## 
##     ozone

## Loading required package: expsmooth

## Loading required package: lmtest

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: tseries

## ResourceSelection 0.3-5   2019-07-22

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:forecast':
## 
##     gghistogram

## The following object is masked from 'package:plyr':
## 
##     mutate

## 
## Attaching package: 'scales'

## The following objects are masked from 'package:psych':
## 
##     alpha, rescale

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

## corrplot 0.84 loaded

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-1

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## 
## Attaching package: 'purrr'

## The following object is masked from 'package:scales':
## 
##     discard

## The following object is masked from 'package:car':
## 
##     some

## The following object is masked from 'package:plyr':
## 
##     compact

## The following object is masked from 'package:caret':
## 
##     lift

## 
## Attaching package: 'seasonal'

## The following object is masked from 'package:psych':
## 
##     outlier

## 
## Attaching package: 'fpp2'

## The following objects are masked from 'package:fpp':
## 
##     ausair, ausbeer, austa, austourists, debitcards, departures,
##     elecequip, euretail, guinearice, oil, sunspotarea, usmelec

## 
## Attaching package: 'Metrics'

## The following object is masked from 'package:pROC':
## 
##     auc

## The following object is masked from 'package:forecast':
## 
##     accuracy

## The following objects are masked from 'package:caret':
## 
##     precision, recall

## Loading required package: xts

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## Loading required package: timeDate

## 
## Attaching package: 'timeSeries'

## The following objects are masked from 'package:seasonal':
## 
##     outlier, series

## The following object is masked from 'package:zoo':
## 
##     time<-

## The following object is masked from 'package:psych':
## 
##     outlier

# Remove unused objects
rm(packages)
rm(i)

# Set work directory
setwd("~/Desktop/Predictive:Forecasting/Week4/HW")

Chapter 7

Question 1

#(a)
head(pigs, 20)

##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct
## 1980  76378  71947  33873  96428 105084  95741 110647 100331  94133 103055
## 1981  76889  81291  91643  96228 102736 100264 103491  97027              
##         Nov    Dec
## 1980  90595 101457
## 1981

pigs.data <- window(pigs, start=1980)
fcast <- ses(pigs.data, h = 4)

fcast

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995       98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995       98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995       98816.41 83958.37 113674.4 76092.99 121539.8

#(b)
s <- sd(fcast$residuals)
fcast_mean <- fcast$mean[1]

up <- fcast_mean + 1.96*s
low <- fcast_mean - 1.96*s

print(paste("95% prediction interval is ", round(low,2), round(up,2)))

## [1] "95% prediction interval is  78679.97 118952.84"

## The predictions are very similar compared to two intervals.

Question 2

ses.func <- function (y, alpha, level, h){
  yhat <- level 
  for(i in 1:length(y)+h){
  if(i <= length(y)){
   yhat[i] <- alpha*y[i] +(1-alpha)*yhat[i-1]
  }
  else{
   yhat[i] <- alpha*yhat[i-1]+(1-alpha)*yhat[i-2]
  }
 }
 return(yhat[length(y):length(y)+h])
}

a <- fcast$model$par[1]
print(paste("forecasts at h = 1", ses.func(pigs, alpha = a, level=1, h=1)))

## [1] "forecasts at h = 1 98314.3452587773"

fcast$mean

##           Sep      Oct      Nov      Dec
## 1995 98816.41 98816.41 98816.41 98816.41

Question 3

ses.func2 <- function(pars = c(alpha, l), y) {
 error <- 0
 SSE <- 0
 alpha <- pars[1]
 l0 <- pars[2]
 y_hat <- l0
 
 for(index in 1:length(y)){
  error <- y[index] - y_hat
  SSE <- SSE + error^2
  
  y_hat <- alpha*y[index] + (1 - alpha)*y_hat 
 }
 
 return(SSE)
}

gpo <-optim(par = c(0.5, pigs[1]), y = pigs, fn = ses.func2)
print(paste("alpha is ",gpo$par[1], "l0 is ", gpo$par[2]))

## [1] "alpha is  0.299008094014243 l0 is  76379.2653476235"

# values from ses() function
print(paste("alpha is ", fcast$model$par[1], "l0 is ", fcast$model$par[2]))

## [1] "alpha is  0.297148833372095 l0 is  77260.0561458528"

## The results are not the same but very similar.

Question 4

ses.func3 <- function(y){
  gpo <-optim(par = c(0.5, pigs[1]), y = pigs, fn = ses.func2)
  a <- fcast$model$par[1]
  l <- fcast$model$par[2]
  pred <- ses.func(y, a, l, 1)
  my_list <- list(a, l, pred)
  return(my_list)
}
ses.func3(pigs)

## [[1]]
##     alpha 
## 0.2971488 
## 
## [[2]]
##        l 
## 77260.06 
## 
## [[3]]
##          
## 98314.35

print(paste("alpha is ", ses.func3(pigs)[1], "l0 is ", ses.func3(pigs)[2],"Forecast is ", ses.func3(pigs)[3]))

## [1] "alpha is  c(alpha = 0.297148833372095) l0 is  c(l = 77260.0561458528) Forecast is  c(98314.3452587773)"

Question 5

# (a)
head(books)

## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
##   Paperback Hardcover
## 1       199       139
## 2       172       128
## 3       111       172
## 4       209       139
## 5       161       191
## 6       119       168

summary(books)

##    Paperback       Hardcover    
##  Min.   :111.0   Min.   :128.0  
##  1st Qu.:167.2   1st Qu.:170.5  
##  Median :189.0   Median :200.5  
##  Mean   :186.4   Mean   :198.8  
##  3rd Qu.:207.2   3rd Qu.:222.0  
##  Max.   :247.0   Max.   :283.0

autoplot(books)

##Both trends show an increasing patterns as the time increases. It is difficult to see if there is any seasonality. 

# (b)
autoplot(ses(books[,1], h = 4))

autoplot(ses(books[,2], h = 4))

# (c)
#Paperback's rmse
rmse(books[,1], ses(books[,1])$fitted)

## [1] 33.63769

#Hardcover's rmse
rmse(books[,2], ses(books[,2])$fitted)

## [1] 31.93101

Question 6

# (a)
fc1 <- holt(books[,1], h=4)
fc1

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       209.4668 166.6035 252.3301 143.9130 275.0205
## 32       210.7177 167.8544 253.5811 145.1640 276.2715
## 33       211.9687 169.1054 254.8320 146.4149 277.5225
## 34       213.2197 170.3564 256.0830 147.6659 278.7735

fc2 <- holt(books[,2], h =4)
fc2

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       250.1739 212.7390 287.6087 192.9222 307.4256
## 32       253.4765 216.0416 290.9113 196.2248 310.7282
## 33       256.7791 219.3442 294.2140 199.5274 314.0308
## 34       260.0817 222.6468 297.5166 202.8300 317.3334

# (b)
# Paperback 
rmse(books[,1], holt(books[,1], h=4)$fitted)

## [1] 31.13692

# Hardcover
rmse(books[,2], holt(books[,2], h=4)$fitted)

## [1] 27.19358

## Because holt's method has an additional estimate of the slope of the series. Therefore, 
## it is more suitable to capture the trend; hence, better predictions and less rmse error. 

# (c)
# Paperback
ses.1 <- ses(books[,1],h = 4)
# Hardcover
ses.2 <- ses(books[,2],h = 4)
# Paperback
holt.1 <- holt(books[,1], h = 4)
# Hardcover
holt.2 <- holt(books[,2], h = 4)

autoplot(ses.1) + autolayer(fitted(ses.1), series = "SES") + autolayer(fitted(holt.1), series = "HOLT") + labs(title = "Paperback")

autoplot(ses.2) + autolayer(fitted(ses.2), series = "SES") + autolayer(fitted(holt.2), series = "HOLT") + labs(title = "Hardcover")

## While ses() has a larger confidence interval, it looks that holt() function is more appropriate. 

# (d)

Rmse.1 <- rmse(books[,1],ses.1$fitted)
Rmse.2 <- rmse(books[,1],holt.1$fitted)
Rmse.3 <- rmse(books[,2],ses.2$fitted)
Rmse.4 <- rmse(books[,2],holt.2$fitted)

# Confidence interval 
# for paperback
print(paste("Upper is", round(ses.1$mean[1]+1.96*Rmse.1,2), "Lower is", 
            round(ses.1$mean[1]-1.96*Rmse.1,2)))

## [1] "Upper is 273.04 Lower is 141.18"

print(paste("Upper is", round(holt.1$mean[1]+1.96*Rmse.2,2), "Lower is", 
            round(holt.1$mean[1]-1.96*Rmse.2,2)))

## [1] "Upper is 270.5 Lower is 148.44"

# for hardcover
print(paste("Upper is", round(ses.2$mean[1]+1.96*Rmse.3,2), "Lower is", 
            round(ses.2$mean[1]-1.96*Rmse.3,2)))

## [1] "Upper is 302.14 Lower is 176.98"

print(paste("Upper is", round(holt.2$mean[1]+1.96*Rmse.4,2), "Lower is", 
            round(holt.2$mean[1]-1.96*Rmse.4,2)))

## [1] "Upper is 303.47 Lower is 196.87"

# Confidence interval using ses() and holt()
# for paperback ses() method 
print(paste("Upper is", round(ses.1$upper[1, "95%"],2), "Lower is", 
            round(ses.1$lower[1,"95%"],2)))

## [1] "Upper is 275.35 Lower is 138.87"

# for paperback holt() method 
print(paste("Upper is", round(holt.1$upper[1, "95%"],2), "Lower is", 
            round(holt.1$lower[1,"95%"],2)))

## [1] "Upper is 275.02 Lower is 143.91"

# for hardcover ses() method 
print(paste("Upper is", round(ses.2$upper[1, "95%"],2), "Lower is", 
            round(ses.2$lower[1,"95%"],2)))

## [1] "Upper is 304.34 Lower is 174.78"

# for hardcover holt() method 
print(paste("Upper is", round(holt.2$upper[1, "95%"],2), "Lower is", 
            round(holt.2$lower[1,"95%"],2)))

## [1] "Upper is 307.43 Lower is 192.92"

## The results produced by ses() and holt() are a bit smaller than the intervals calculated
## using formula.

Question 7

head(eggs)

## Time Series:
## Start = 1900 
## End = 1905 
## Frequency = 1 
## [1] 276.79 315.42 314.87 321.25 314.54 317.92

m1 <- holt(eggs, h=100)
m2 <- holt(eggs, h=100, damped = TRUE)
m3 <- holt(eggs, h=100, exponential= TRUE)
m4 <- holt(eggs, h=100, lambda = "auto", biasadj = T)
m5 <- holt(eggs, h=100, damped = TRUE, lambda = "auto", biasadj = TRUE)

autoplot(eggs)+ autolayer(m1, series = "Default", PI = F)+ 
  autolayer(m2, series= "Damped", PI =F) + autolayer(m3, series= "Exponential", PI =F)+
  autolayer(m4, series= "Lambda+biasadj", PI =F)+ autolayer(m5, series= "Damped+Box-cox", PI =F)

print(paste("Model 1", round(rmse(eggs, m1$fitted),2), "Model 2", round(rmse(eggs, m2$fitted),2),
      "Model 3", round(rmse(eggs, m3$fitted),2), "Model 4", round(rmse(eggs, m4$fitted),2), 
      "Model 5", round(rmse(eggs, m5$fitted),2)))

## [1] "Model 1 26.58 Model 2 26.54 Model 3 26.5 Model 4 26.39 Model 5 26.59"

## Based on the rmse metric, Model 4 with lambda & biasadj has the best rmse of all other models.

Question 8

# (a)
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts <- ts(retaildata[,"A3349873A"],
  frequency=12, start=c(1982,4))

autoplot(myts)

## From the autoplot, it shows that seasonal variation is proportional to the level of the time series. Therefore, it is needed to use multiplicative seasonality. 

# (b)
fit1 <- hw(myts,seasonal="multiplicative", damped = FALSE)
fit2 <- hw(myts,seasonal="multiplicative", damped = TRUE)

autoplot(myts)+
  autolayer(fit1, series="Without Damped")+autolayer(fit2, series="With Damped")

# (c)
print(paste("Without damped", round(rmse(myts, fit1$fitted),4), "With damped", round(rmse(myts, fit2$fitted),4)))

## [1] "Without damped 13.2938 With damped 13.3049"

## Based on the metric, I prefer the method with not damped because it has the lowest rmse. 

# (d)
checkresiduals(fit1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 40.405, df = 8, p-value = 2.692e-06
## 
## Model df: 16.   Total lags used: 24

## The residuals plot did not show a signal with equal intensities at every frequency; therefore, it did not look like white noise. 

# (e)
myts.train <- window(myts, end=c(2010,12))
myts.test <- window(myts, start=2011)

fc <- snaive(myts.train)
rmse(myts.test, fc$mean)

## [1] 71.44309

hw.no <- hw(myts.train,seasonal="multiplicative", damped = FALSE)
rmse(myts.test, hw.no$mean)

## [1] 70.11659

print(paste("Seasonal Naive ", round(rmse(myts.test, fc$mean),4), "Holt without damped ", round(rmse(myts.test, hw.no$mean),4)))

## [1] "Seasonal Naive  71.4431 Holt without damped  70.1166"

## Looking at the rmse metric, I was able to beat the seasonal naive approach by using the holt's method without damped.

Question 9

train <- ts(as.vector(myts), start=c(1982,4), end=c(2010,12), frequency = 12) #######fixxxx
lam <- BoxCox.lambda(train)
box.train <- BoxCox(train, lam)
stl.m <- stl(box.train, s.window='periodic', robust=T)

autoplot(stl.m) +
  ggtitle('STL Decomposition')

box.train.adj <- train - stl.m$time.series[,'seasonal']

autoplot(train, series='Unadjusted Data') +
  autolayer(box.train.adj, series='Seasonally Adjusted') +
  ylab('')

ets.m <- ets(box.train.adj)
summary(ets.m)

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = box.train.adj) 
## 
##   Smoothing parameters:
##     alpha = 0.6374 
##     beta  = 0.0042 
##     gamma = 1e-04 
##     phi   = 0.9794 
## 
##   Initial states:
##     l = 62.7778 
##     b = 0.8092 
##     s = 0.9416 0.9061 0.9359 1.5071 1.087 1.0113
##            0.9656 0.9447 0.9228 0.8963 0.9622 0.9194
## 
##   sigma:  0.0418
## 
##      AIC     AICc      BIC 
## 3439.414 3441.512 3508.598 
## 
## Training set error measures:
##                   ME     RMSE      MAE      MPE     MAPE      MASE       ACF1
## Training set 0.48757 8.792609 6.292699 0.262504 3.121933 0.3943595 0.01376926

fc.ets <- forecast(ets.m)$mean
rmse(myts.test, fc.ets)

## [1] 80.57372

## The ets model has a much high rmse than my previous models, which will not be prefer as a better model

Question 10

# (a)
head(ukcars)

##         Qtr1    Qtr2    Qtr3    Qtr4
## 1977 330.371 371.051 270.670 343.880
## 1978 358.491 362.822

autoplot(ukcars)

## The plot shows an increasing trend; however, there is an large dent around 2001. Visually speaking, there is seasonality with a frequency of one year. 

# (b)
stl.cars <- ukcars %>%
  stl(t.window=13, s.window="periodic", robust=TRUE) 

ukcars.adj <- seasadj(stl.cars)
# (c)
stlf.dam <- ukcars.adj %>% stlf(h = 8, etsmodel = "AAN", damped = TRUE)
autoplot(stlf.dam)

# (d)
holt.dam <- ukcars.adj %>% stlf(h = 8, etsmodel = "AAN", damped = FALSE)
autoplot(holt.dam)

# (e)
ets.cars <- ets(ukcars)
ets.cars

## ETS(A,N,A) 
## 
## Call:
##  ets(y = ukcars) 
## 
##   Smoothing parameters:
##     alpha = 0.6199 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 314.2568 
##     s = -1.7579 -44.9601 21.1956 25.5223
## 
##   sigma:  25.9302
## 
##      AIC     AICc      BIC 
## 1277.752 1278.819 1296.844

ets.cars.fc <- forecast(ets.cars, h = 8)
## ETS(A,N,A) 

# (f)
rmse(ets.cars$fitted,ukcars.adj)

## [1] 37.14855

rmse(holt.dam$fitted,ukcars.adj)

## [1] 22.86408

rmse(stlf.dam$fitted,ukcars.adj)

## [1] 22.81499

## From rmse metric, the additive damped trend method is the better fit than holt's linear and ets. 

# (g)
autoplot(ukcars.adj)+
  autolayer(stlf.dam, series= "stlf", PI=F)+autolayer(holt.dam, series="holt", PI=F)+
  autolayer(ets.cars.fc, series="ets", PI=F)

## From the autoplot, it looks like that holt's linear method seems more likely than other two methods. 

# (h)
checkresiduals(holt.dam)

## Warning in checkresiduals(holt.dam): The fitted degrees of freedom is based on
## the model used for the seasonally adjusted data.

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(A,A,N)
## Q* = 24.992, df = 4, p-value = 5.05e-05
## 
## Model df: 4.   Total lags used: 8

Question 11

# (a)
head(visitors)

##        May   Jun   Jul   Aug   Sep   Oct
## 1985  75.7  75.4  83.1  82.9  77.3 105.7

autoplot(visitors)

## The plot shows an increasing trend and seasonality with a frequency of one year. 

# (b)
train.vis <- window(visitors, start=c(1985,5), end = c(2003,4))
test.vis <- window(visitors, end = c(2005,4))

hw.multi <- hw(train.vis, h=24, seasonal ="multiplicative")
hw.multi

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2003       293.5512 272.0790 315.0235 260.7123 326.3902
## Jun 2003       311.3817 286.3466 336.4168 273.0939 349.6695
## Jul 2003       379.6865 346.4414 412.9316 328.8425 430.5305
## Aug 2003       341.5672 309.2321 373.9023 292.1150 391.0195
## Sep 2003       330.3820 296.7647 363.9994 278.9688 381.7953
## Oct 2003       371.9441 331.4615 412.4267 310.0313 433.8569
## Nov 2003       387.1607 342.2705 432.0509 318.5070 455.8144
## Dec 2003       471.3185 413.3054 529.3316 382.5951 560.0419
## Jan 2004       348.1741 302.8173 393.5309 278.8068 417.5414
## Feb 2004       390.3863 336.7055 444.0672 308.2885 472.4841
## Mar 2004       377.6150 322.9349 432.2951 293.9890 461.2410
## Apr 2004       337.3049 285.9782 388.6316 258.8075 415.8024
## May 2004       284.8762 239.4088 330.3437 215.3398 354.4127
## Jun 2004       302.1571 251.6621 352.6521 224.9316 379.3825
## Jul 2004       368.4105 304.0470 432.7739 269.9751 466.8459
## Aug 2004       331.3981 270.9579 391.8384 238.9627 423.8335
## Sep 2004       320.5215 259.5777 381.4653 227.3160 413.7270
## Oct 2004       360.8154 289.3783 432.2525 251.5618 470.0690
## Nov 2004       375.5478 298.2123 452.8832 257.2733 493.8222
## Dec 2004       457.1458 359.3352 554.9564 307.5574 606.7342
## Jan 2005       337.6781 262.6845 412.6717 222.9853 452.3709
## Feb 2005       378.5881 291.3958 465.7805 245.2390 511.9373
## Mar 2005       366.1740 278.7938 453.5542 232.5375 499.8105
## Apr 2005       327.0594 246.2591 407.8596 203.4861 450.6326

# (c)
## From the autoplot, it shows that seasonal variation is proportional to the level of the time series. Therefore, it is needed to use multiplicative seasonality. 

# (d)
ets.vis <- ets(train.vis)
ets.vis ## ETS(M,Ad,M)

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = train.vis) 
## 
##   Smoothing parameters:
##     alpha = 0.6396 
##     beta  = 8e-04 
##     gamma = 1e-04 
##     phi   = 0.9799 
## 
##   Initial states:
##     l = 87.7602 
##     b = 3.0639 
##     s = 0.9392 1.0538 1.0792 0.9675 1.3275 1.091
##            1.0329 0.8867 0.9399 1.0231 0.8488 0.8103
## 
##   sigma:  0.0539
## 
##      AIC     AICc      BIC 
## 2300.157 2303.629 2360.912

ets.mam <- forecast(ets.vis, h =24)
# ETS(AAA)
ets.vis.aaa <- ets(model="AAA", BoxCox(train.vis, lambda = BoxCox.lambda(train.vis)))
ets.aaa <- forecast(ets.vis.aaa, h=24)
# SNaive
naive.vis <- snaive(train.vis, h =24)

# STL decomposition Box-Cox 
lambda <- BoxCox.lambda(train.vis)
box.train <- BoxCox(train.vis, lambda)
stl.m <- stl(box.train, s.window='periodic', robust=T)

autoplot(stl.m) +
  ggtitle('STL Decomposition')

box.train.adj <- train.vis - stl.m$time.series[,'seasonal']

ets.m <- ets(box.train.adj)
summary(ets.m)

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = box.train.adj) 
## 
##   Smoothing parameters:
##     alpha = 0.6367 
##     beta  = 1e-04 
##     gamma = 1e-04 
##     phi   = 0.98 
## 
##   Initial states:
##     l = 88.0983 
##     b = 3.0013 
##     s = 0.9419 1.0561 1.0761 0.9701 1.3136 1.0893
##            1.0269 0.8922 0.9401 1.0215 0.854 0.8182
## 
##   sigma:  0.0537
## 
##      AIC     AICc      BIC 
## 2298.417 2301.889 2359.172 
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.8655531 14.43875 10.53325 0.1298963 3.978464 0.4037624
##                     ACF1
## Training set -0.05100681

fc.ets <- forecast(ets.m, h=24)
fc.ets

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2003       291.6205 271.5537 311.6872 260.9310 322.3099
## Jun 2003       304.3876 279.5465 329.2287 266.3964 342.3788
## Jul 2003       364.1297 330.3853 397.8741 312.5221 415.7373
## Aug 2003       335.1671 300.7929 369.5413 282.5963 387.7379
## Sep 2003       318.1115 282.6153 353.6076 263.8248 372.3982
## Oct 2003       366.1690 322.2496 410.0883 299.0001 433.3378
## Nov 2003       388.4597 338.8278 438.0915 312.5543 464.3651
## Dec 2003       468.4604 405.1498 531.7710 371.6352 565.2856
## Jan 2004       345.9877 296.8047 395.1707 270.7687 421.2067
## Feb 2004       383.8303 326.7018 440.9589 296.4597 471.2010
## Mar 2004       376.7345 318.2486 435.2203 287.2881 466.1808
## Apr 2004       336.0082 281.7751 390.2413 253.0658 418.9506
## May 2004       291.9302 243.0766 340.7838 217.2150 366.6454
## Jun 2004       304.7044 251.9627 357.4461 224.0429 385.3659
## Jul 2004       364.5011 299.3799 429.6223 264.9068 464.0954
## Aug 2004       335.5020 273.7481 397.2559 241.0576 429.9465
## Sep 2004       318.4230 258.1384 378.7076 226.2257 410.6203
## Oct 2004       366.5204 295.2523 437.7884 257.5253 475.5155
## Nov 2004       388.8250 311.2767 466.3732 270.2251 507.4248
## Dec 2004       468.8921 373.0860 564.6981 322.3693 615.4148
## Jan 2005       346.3001 273.8893 418.7109 235.5574 457.0428
## Feb 2005       384.1700 302.0453 466.2946 258.5711 509.7688
## Mar 2005       377.0611 294.7294 459.3928 251.1457 502.9766
## Apr 2005       336.2937 261.3533 411.2341 221.6822 450.9051

# (e)
rmse(test.vis, ets.mam$mean)

## [1] 80.23124

rmse(test.vis, ets.aaa$mean)

## [1] 415.0766

rmse(test.vis, naive.vis$mean)

## [1] 50.30097

rmse(test.vis, fc.ets$mean)

## [1] 80.87764

checkresiduals(naive.vis)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 295.02, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24

## Seasonal naive model gives the best forecasts in terms of low rmse. In addition, it is not having issue with white noise. 

# (f) #######################################Redo 
fets_add_BoxCox <- function(y, h) {
 forecast(ets(
  y,
  lambda = BoxCox.lambda(y),
  additive.only = TRUE
 ),
 h = h)
}
fstlm <- function(y, h) {
 forecast(stlm(
  y, 
  lambda = BoxCox.lambda(y),
  s.window = frequency(y) + 1,
  robust = TRUE,
  method = "ets"
 ),
 h = h)
}
fets <- function(y, h) {
 forecast(ets(y),
          h = h)
}
sqrt(mean(tsCV(visitors, snaive, h = 1)^2, na.rm = TRUE))

## [1] 32.78675

sqrt(mean(tsCV(visitors, fets_add_BoxCox, h = 1)^2,    ###has problem
          na.rm = TRUE))

## [1] 18.86439

sqrt(mean(tsCV(visitors, fstlm, h = 1)^2,
          na.rm = TRUE))

## [1] 17.49642

sqrt(mean(tsCV(visitors, fets, h = 1)^2, na.rm = TRUE))

## [1] 18.52985

sqrt(mean(tsCV(visitors, hw, h = 1, #has porblem
               seasonal = "multiplicative")^2,
          na.rm = TRUE))

## [1] 19.62107

##

Question 12

fets <- function(y, h) {
  forecast(ets(y), h = h)
}

# (a)
head(qcement)

##       Qtr1  Qtr2  Qtr3  Qtr4
## 1956 0.465 0.532 0.561 0.570
## 1957 0.529 0.604

ets.cv <- tsCV(qcement, fets, h = 4)
snaive.cv <- tsCV(qcement, snaive, h = 4)

# (b)
mean(ets.cv^2,na.rm=T)

## [1] 0.01250954

mean(snaive.cv^2,na.rm=T)

## [1] 0.01792147

## It looks that ets model is more accurate than seasonal naive. ##########

Question 13

head(ausbeer)

##      Qtr1 Qtr2 Qtr3 Qtr4
## 1956  284  213  227  308
## 1957  262  228

head(bricksq)

##      Qtr1 Qtr2 Qtr3 Qtr4
## 1956  189  204  208  197
## 1957  187  214

head(dole)

##       Jan  Feb  Mar  Apr  May  Jun
## 1956 4742 6128 6494 5379 6011 7003

head(a10)

##           Jul      Aug      Sep      Oct      Nov      Dec
## 1991 3.526591 3.180891 3.252221 3.611003 3.565869 4.306371

head(usmelec)

##          Jan     Feb     Mar     Apr     May     Jun
## 1973 160.218 143.539 148.158 139.589 147.395 161.244

# ausbeer 
ausbeer.train <- window(ausbeer, end = c(2007,2))
ausbeer.test <- window(ausbeer, start = c(2007,3))

#ets
ets.aus <- forecast(ets(ausbeer.train), h = 12)

#snaive
snaive.aus <- snaive(ausbeer.train,  h = 12)

#stlf
stlf.aus <- stlf(ausbeer.train, h = 12, lambda = BoxCox.lambda(ausbeer.train))

forecast::accuracy(ets.aus, ausbeer.test)

##                      ME      RMSE       MAE          MPE     MAPE      MASE
## Training set -0.3466095 15.781973 11.979955 -0.064073435 2.864311 0.7567076
## Test set      0.1272571  9.620828  8.919347  0.009981598 2.132836 0.5633859
##                    ACF1 Theil's U
## Training set -0.1942276        NA
## Test set      0.3763918 0.1783972

forecast::accuracy(snaive.aus, ausbeer.test)

##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  3.336634 19.67005 15.83168  0.9044009 3.771370 1.0000000
## Test set     -2.916667 10.80509  9.75000 -0.6505927 2.338012 0.6158537
##                      ACF1 Theil's U
## Training set 0.0009690785        NA
## Test set     0.3254581810 0.1981463

forecast::accuracy(stlf.aus, ausbeer.test)

##                      ME      RMSE       MAE        MPE     MAPE      MASE
## Training set  0.5421438 13.330464 10.237474  0.1268498 2.454628 0.6466447
## Test set     -2.5989512  9.790852  8.939692 -0.6576791 2.153152 0.5646710
##                    ACF1 Theil's U
## Training set -0.1594160        NA
## Test set      0.2871443 0.1691628

## looks like they all have similar results; however, ets model has better performance based on the rmse metric. 

# bricksq
bricksq.train <- window(bricksq, end = c(1991,1))
bricksq.test <- window(bricksq, start = c(1991,2))

ets.bricksq <- forecast(ets(bricksq.train), h = 36)

snaive.bricksq <- snaive(bricksq.train,  h = 36)

stlf.bricksq <- stlf(bricksq.train, h = 36, lambda = BoxCox.lambda(bricksq.train))

forecast::accuracy(ets.bricksq, bricksq.test)

##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.420627 21.77283 15.37691 -0.1306659 3.788064 0.4254112
## Test set      9.185507 17.16519 15.11577  1.9865479 3.404188 0.4181867
##                   ACF1 Theil's U
## Training set 0.1682149        NA
## Test set     0.3082903 0.4933751

forecast::accuracy(snaive.bricksq, bricksq.test)

##                      ME     RMSE      MAE       MPE      MAPE     MASE
## Training set   7.708029 49.66883 36.14599  1.724113  8.858339 1.000000
## Test set     -32.857143 53.61103 44.71429 -7.422967 10.329959 1.237047
##                    ACF1 Theil's U
## Training set  0.7956728        NA
## Test set     -0.1296336  1.491617

forecast::accuracy(stlf.bricksq, bricksq.test)

##                     ME     RMSE      MAE       MPE     MAPE      MASE      ACF1
## Training set  1.217074 22.49498 16.21792 0.3352736 3.895654 0.4486784 0.1769356
## Test set     25.355750 32.76304 28.16652 5.6133717 6.287438 0.7792435 0.5383942
##              Theil's U
## Training set        NA
## Test set     0.9328939

## Based on the error metrics, ets model has shown an dominated performance rmse than other two models. 

# dole
dole.train <- window(dole, end = c(1989,7))
dole.test <- window(dole, start = c(1989,8))

ets.dole <- forecast(ets(dole.train), h = 36)

snaive.dole<- snaive(dole.train,  h = 36)

stlf.dole <- stlf(dole.train, h = 36, lambda = BoxCox.lambda(dole.train))

forecast::accuracy(ets.dole, dole.test)

##                        ME      RMSE        MAE        MPE     MAPE      MASE
## Training set     98.00253  16130.58   9427.497  0.5518769  6.20867 0.2995409
## Test set     221647.42595 279668.65 221647.426 32.5447637 32.54476 7.0424271
##                   ACF1 Theil's U
## Training set 0.5139536        NA
## Test set     0.9394267  11.29943

forecast::accuracy(snaive.dole, dole.test)

##                     ME      RMSE       MAE       MPE     MAPE     MASE
## Training set  12606.58  56384.06  31473.16  3.350334 27.73651 1.000000
## Test set     160370.33 240830.92 186201.00 20.496197 27.38678 5.916184
##                   ACF1 Theil's U
## Training set 0.9781058        NA
## Test set     0.9208325  9.479785

forecast::accuracy(stlf.dole, dole.test)

##                       ME       RMSE        MAE        MPE      MAPE      MASE
## Training set    137.7633   6019.681   3490.508  0.2754407  3.674711 0.1109043
## Test set     204490.0015 267551.477 206293.106 29.1688279 29.632258 6.5545727
##                    ACF1 Theil's U
## Training set -0.0792675        NA
## Test set      0.9382601  10.65525

## snaive model has better performance at all error metrics. 


# a10
a10.train <- window(a10, end = c(2005,6))
a10.test <- window(a10, start = c(2005,7))

ets.a10<- forecast(ets(a10.train ), h = 36)

snaive.a10 <- snaive(a10.train ,  h = 36)

stlf.a10<- stlf(a10.train, h = 36, lambda = BoxCox.lambda(a10.train ))

forecast::accuracy(ets.a10, a10.test)

##                      ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.04378049 0.488989 0.3542285 0.2011915 3.993267 0.3611299
## Test set     1.62559331 2.562046 2.0101563 7.0011207 9.410027 2.0493197
##                     ACF1 Theil's U
## Training set -0.05238173        NA
## Test set      0.23062972 0.6929693

forecast::accuracy(snaive.a10, a10.test)

##                     ME     RMSE       MAE      MPE     MAPE     MASE      ACF1
## Training set 0.9577207 1.177528 0.9808895 10.86283 11.15767 1.000000 0.3779746
## Test set     4.3181585 5.180738 4.3306974 19.80542 19.89352 4.415071 0.6384822
##              Theil's U
## Training set        NA
## Test set      1.383765

forecast::accuracy(stlf.a10, a10.test)

##                       ME      RMSE      MAE         MPE     MAPE      MASE
## Training set 0.009379657 0.4034157 0.298922 -0.04447829 3.490468 0.3047458
## Test set     1.170547699 2.1254012 1.776014  4.62679885 8.426817 1.8106159
##                    ACF1 Theil's U
## Training set -0.1371340        NA
## Test set      0.1693221 0.5694937

## stlf model has outperformed others. 

# h02
h02.train <- window(h02, end = c(2005,7))
h02.test <- window(h02, start = c(2005,8))

ets.h02 <- forecast(ets(h02.train), h = 36)

snaive.h02 <- snaive(h02.train,  h = 36)

stlf.h02 <- stlf(h02.train, h = 36, lambda = BoxCox.lambda(h02.train))

forecast::accuracy(ets.h02, h02.test)

##                        ME       RMSE        MAE        MPE     MAPE      MASE
## Training set 0.0004513622 0.04579930 0.03426080 -0.2209876 4.560572 0.5745005
## Test set     0.0342751950 0.08019185 0.06763825  2.8172154 7.267771 1.1341883
##                     ACF1 Theil's U
## Training set  0.05057522        NA
## Test set     -0.12581913 0.4581788

forecast::accuracy(snaive.h02, h02.test)

##                        ME       RMSE        MAE        MPE     MAPE     MASE
## Training set  0.037749040 0.07171709 0.05963581  5.0942482 8.197262 1.000000
## Test set     -0.004309813 0.08238325 0.06787933 -0.1376051 7.444182 1.138231
##                    ACF1 Theil's U
## Training set 0.40080565        NA
## Test set     0.09840611 0.4929215

forecast::accuracy(stlf.h02, h02.test)

##                      ME       RMSE        MAE        MPE     MAPE      MASE
## Training set 0.00120577 0.03829965 0.02889214 -0.1362664 3.907902 0.4844764
## Test set     0.04481819 0.07945414 0.06413379  3.9816962 6.832377 1.0754241
##                    ACF1 Theil's U
## Training set -0.1524301        NA
## Test set     -0.1620946 0.4519207

## stlf model gives a relatively better forecasts based on its lowest rmse. 

# usmelec
usmelec.train <- window(usmelec, end = c(2010,6))
usmelec.test <- window(usmelec, start = c(2010,7))

ets.usmelec <- forecast(ets(usmelec.train), h = 36)

snaive.usmelec <- snaive(usmelec.train,  h = 36)

stlf.usmelec<- stlf(usmelec.train, h = 36, lambda = BoxCox.lambda(usmelec.train))

forecast::accuracy(ets.usmelec, usmelec.test)

##                        ME      RMSE       MAE        MPE     MAPE      MASE
## Training set  -0.07834851  7.447167  5.601963 -0.1168709 2.163495 0.6225637
## Test set     -10.20080652 15.291359 13.123441 -3.1908161 3.926338 1.4584491
##                   ACF1 Theil's U
## Training set 0.2719249        NA
## Test set     0.4679496 0.4859495

forecast::accuracy(snaive.usmelec, usmelec.test)

##                    ME     RMSE       MAE      MPE     MAPE     MASE      ACF1
## Training set 4.921564 11.58553  8.998217 2.000667 3.511648 1.000000 0.4841860
## Test set     5.475972 17.20037 13.494750 1.391437 3.767514 1.499714 0.2692544
##              Theil's U
## Training set        NA
## Test set     0.4765145

forecast::accuracy(stlf.usmelec, usmelec.test)

##                        ME      RMSE       MAE         MPE     MAPE      MASE
## Training set  -0.03002591  6.198606  4.648595 -0.01658299 1.799262 0.5166129
## Test set     -18.99138915 23.249466 19.962853 -5.77781982 6.025763 2.2185343
##                    ACF1 Theil's U
## Training set 0.06453086        NA
## Test set     0.60201957 0.7417298

## ets model has better performance than others based on its overall low error metrics.

Question 14

# (a)

#bicoal
autoplot(forecast(ets(bicoal)))

#chicken
autoplot(forecast(ets(chicken)))

#dole
autoplot(forecast(ets(dole)))

#usdeaths
autoplot(forecast(ets(usdeaths)))

#lynx
autoplot(forecast(ets(lynx)))

#ibmclose
autoplot(forecast(ets(ibmclose)))

#eggs
autoplot(forecast(ets(eggs)))

## No, it does not give good consistent forecasts because the uniqueness of each time series. 

# (b)
## It looks like that ets model in bicoal, chicken, lynx, ibmclose and eggs have struggled to capture the seasonality which shows an flat line. However, it was not the case in dole and usdeaths.

Question 15

ets.usdeaths <- ets(usdeaths, model = "MAM")
fc.ets <- forecast(ets.usdeaths, h=1)
hw.usdeaths <- hw(usdeaths, h=1, seasonal = "multiplicative")
fc.ets

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 1979       8233.107 7883.699 8582.515 7698.734 8767.481

hw.usdeaths

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 1979        8217.64 7845.352 8589.928 7648.274 8787.005

Question 16

#ets(ibmclose)
## chose ibmclose as its ets model gives A,N,N 

ibmclose.ets <- ets(ibmclose, model = "ANN")
ibmclose.ets

## ETS(A,N,N) 
## 
## Call:
##  ets(y = ibmclose, model = "ANN") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 459.9339 
## 
##   sigma:  7.2637
## 
##      AIC     AICc      BIC 
## 3648.450 3648.515 3660.182

fc.ibm <- forecast(ibmclose.ets,h=1)
fc.ibm

##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 370       356.9995 347.6907 366.3083 342.7629 371.2361

# From the model, I obtain 
sigm <- 7.2637
alpha <- 0.9999 
h <- 2
fv <- sigm*(1+(alpha^2)*(h-1))

#Point forecast
fc.ibm$mean[1]

## [1] 356.9995

#95% confidence interval 
fc.ibm$lower[1,"95%"]

##      95% 
## 342.7629

fc.ibm$upper[1,"95%"]

##      95% 
## 371.2361

## I showed that the point forecast and confidence interval are the same as the model generated. Therefore, forecast variance is given by the formula.

Question 17

#L^hat T+h|T + 1.96*sigma^hat h
#L^hat T+h|T - 1.96*sigma^hat h

HW2 (Chapter 7)

Shibo Li

8/1/2021

Chapter 7

Question 1

Question 2

Question 3

Question 4

Question 5

Question 6

Question 7

Question 8

Question 9

Question 10

Question 11

Question 12

Question 13

Question 14

Question 15

Question 16

Question 17