CH7-8 exercises

library(fpp2)

## Loading required package: ggplot2

## Loading required package: forecast

## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Registered S3 methods overwritten by 'forecast':
##   method             from    
##   fitted.fracdiff    fracdiff
##   residuals.fracdiff fracdiff

## Loading required package: fma

## Loading required package: expsmooth

library(Metrics)

## Warning: package 'Metrics' was built under R version 3.6.1

## 
## Attaching package: 'Metrics'

## The following object is masked from 'package:forecast':
## 
##     accuracy

library(tseries)
library(forecast)
#CH7) ----
#1) Consider the pigs series - the number of pigs slaughtered in Victoria each Month.
autoplot(decompose(pigs))

# a. Use the ses() function in R to find the optimal values of a and L0
#    and generate forecasts for the next fourth months

fit1 <- ses(pigs, h = 4)
summary(fit1)

## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = pigs, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.2971 
## 
##   Initial states:
##     l = 77260.0561 
## 
##   sigma:  10308.58
## 
##      AIC     AICc      BIC 
## 4462.955 4463.086 4472.665 
## 
## Error measures:
##                    ME    RMSE      MAE       MPE     MAPE      MASE
## Training set 385.8721 10253.6 7961.383 -0.922652 9.274016 0.7966249
##                    ACF1
## Training set 0.01282239
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995       98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995       98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995       98816.41 83958.37 113674.4 76092.99 121539.8

l <- fit1$model$par[2]
a <- fit1$model$par[1]

print(paste("alpha:", a, "l0:", l))

## [1] "alpha: 0.297148833372095 l0: 77260.0561458528"

# b. compute a 95% prediction interval for the first forecast using yhat +- 1.96s
#    where s is the standard deviation of the residuals.

#the 95% prediction intervals are included in the summary above and calculated below.
s <- sd(fit1$residuals)
#upper
fit1$mean[1] + 1.96*s

## [1] 118952.8

#lower
fit1$mean[1] - 1.96*s

## [1] 78679.97

#2) Write your own function to implement simple exponential smoothing. 
#   The function should take arguments y, alpha and level. It should return 
#   the forecast of the next observation in the series. Does it give the 
#   same forecast as ses()?

mySES <- function(y, alpha, level, h) {
 yHat <- level
 for(i in 1:length(y)+h){
  if(i <= length(y)){
   yHat[i] <- alpha*y[i] +(1-alpha)*yHat[i-1]
  }
  else{ #forecasting
   yHat[i] <- alpha*yHat[i-1]+(1-alpha)*yHat[i-2]
  }
 }
 return(yHat[length(y):length(y)+h])
}

print(paste('forecast h=1:',mySES(pigs,alpha = a, level = l, h =1)))

## [1] "forecast h=1: 98314.3452587773"

fit1$mean

##           Sep      Oct      Nov      Dec
## 1995 98816.41 98816.41 98816.41 98816.41

# The two equations match for the first period prediction.

#3) Modify your function from the previous exercise to return the sum of
#   squared errors rather than the forecast of the next observation. Then use
#   optim() function to find the optimal values of a and l0.
#   Do you get the same values as the ses() function?

mySES2 <- function(pars = c(alpha, l), y) {
 error <- 0
 SSE <- 0
 alpha <- pars[1]
 l0 <- pars[2]
 y_hat <- l0
 
 for(index in 1:length(y)){
  error <- y[index] - y_hat
  SSE <- SSE + error^2
  
  y_hat <- alpha*y[index] + (1 - alpha)*y_hat 
 }
 
 return(SSE)
}
optParam <-optim(par = c(0.5, pigs[1]), y = pigs, fn = mySES2)
#Optimal parameters from my SES function: 
print(paste("Alpha:", optParam$par[1], "l0:", optParam$par[2]))

## [1] "Alpha: 0.299008094014243 l0: 76379.2653476235"

#Parameters from SES function
print(paste("Alpha:", a, "l0",l))

## [1] "Alpha: 0.297148833372095 l0 77260.0561458528"

# Both are relatively close to one another.

#4) Combine your previous two functions to produce a function which both 
#   both finds the optimal values of a and l, and produces a forecast of the
#   next observation in the series.



mySES3 <- function(y){
 optParam <- optim(par= c(0.5, y[1]), y=y, fn = mySES2)
 a <- optParam$par[1]
 l <- optParam$par[2]
 preds <-mySES(y, a, l,1)
 return(preds)
}

mySES3(pigs)

## [1] 98308.8

ses(pigs, h = 1)$mean

##           Sep
## 1995 98816.41

#the predictions are relatively similar.

#5) Data set books contains the daily sales of paerback and hardcover 
#    books at the same store. Thetask is to forecast the next four days' sales
#    for paperback and hardcover books.
# a) Plot the series and discuss the main features of the data
autoplot(books)

#There is a slight upward trend with time in both series. It would seem there is a
# slight negative correlation between the two series.
# b) Use the ses() function to forecast each series, and plot the forecasts.
autoplot(ses(books[,1],h =4 ))

 autoplot(ses(books[,2],h=4))

# c) Compute the RMSE values for the training data in each case
 rmse(books[,1],ses(books[,1])$fitted)

## [1] 33.63769

 rmse(books[,2],ses(books[,2])$fitted)

## [1] 31.93101

#6) a) Now apply Holt's linear method to the paperback and hardback series and compute
 # four-day forecasts in each case
 #Paperback
 holt(books[,1], h= 4)

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       209.4668 166.6035 252.3301 143.9130 275.0205
## 32       210.7177 167.8544 253.5811 145.1640 276.2715
## 33       211.9687 169.1054 254.8320 146.4149 277.5225
## 34       213.2197 170.3564 256.0830 147.6659 278.7735

 #Hardcover
 holt(books[,2], h= 4)

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       250.1739 212.7390 287.6087 192.9222 307.4256
## 32       253.4765 216.0416 290.9113 196.2248 310.7282
## 33       256.7791 219.3442 294.2140 199.5274 314.0308
## 34       260.0817 222.6468 297.5166 202.8300 317.3334

 #b) Compare the RMSE measures of Holt's method for the two series to those SES in the
 #   previous question. Discuss the merits of the two forecasting methods for these data
 #Paperback
  rmse(books[,1],holt(books[,1])$fitted)

## [1] 31.13692

 #Hardcover
  rmse(books[,2],holt(books[,2])$fitted)

## [1] 27.19358

  #Holt's linear method outperforms the SES which is to be expected because the
  #Holt uses an additional parameter which will give it better predictions.
  
 #C) Compare the forecasts for the two series using both method. Which do you 
  # think is best
 #Paperback
  Paperback.ses <- ses(books[,1],h=4)
 #Hardcover
  Hardcover.ses <- ses(books[,2],h=4)
 #Paperback
  Paperback.h <- holt(books[,1], h= 4)
 #Hardcover
  Hardcover.h <- holt(books[,2], h= 4)

autoplot(books) +
 autolayer(Paperback.ses, series = 'SES Paperback') +
 autolayer(Hardcover.ses, series = 'SES Hardcover') +
 autolayer(Paperback.h, series = 'Holt Paperback') +
 autolayer(Hardcover.h, series = 'Holt Hardcover')

#This plot shows how the SES function has a larger confidence interval, while the 
# top 95th is approximately the same the bottom 95th is significantly larger.
# This mean's that Holt's linear method is more practical.

# D) Calculate a 95% prediction interval for the first forecast for each series, using
# the RMSE values and assuming normal erros. Compare your intervals with those produced.

paperback.ses.rmse <- rmse(books[,1],Paperback.ses$fitted)
paperback.holt.rmse <- rmse(books[,1], Paperback.h$fitted)
hardcover.ses.rmse <- rmse(books[,2],Hardcover.ses$fitted)
hardcover.holt.rmse <- rmse(books[,2],Hardcover.h$fitted)

Paperback.ses$mean[1] + 1.96*paperback.ses.rmse

## [1] 273.0395

Paperback.ses$mean[1] - 1.96*paperback.ses.rmse

## [1] 141.1798

Paperback.ses$upper[1,'95%']

##      95% 
## 275.3523

Paperback.ses$lower[1,'95%']

##     95% 
## 138.867

Paperback.h$mean[1] + 1.96*paperback.holt.rmse

## [1] 270.4951

Paperback.h$mean[1] - 1.96*paperback.holt.rmse

## [1] 148.4384

Paperback.h$upper[1,'95%']

##      95% 
## 275.0205

Paperback.h$lower[1,'95%']

##     95% 
## 143.913

Hardcover.ses$mean[1] + 1.96*hardcover.ses.rmse

## [1] 302.1449

Hardcover.ses$mean[1] - 1.96*hardcover.ses.rmse

## [1] 176.9753

Hardcover.ses$upper[1,'95%']

##      95% 
## 304.3403

Hardcover.ses$lower[1,'95%']

##      95% 
## 174.7799

Hardcover.h$mean[1] + 1.96*hardcover.holt.rmse

## [1] 303.4733

Hardcover.h$mean[1] - 1.96*hardcover.holt.rmse

## [1] 196.8745

Hardcover.h$upper[1,'95%']

##      95% 
## 307.4256

Hardcover.h$lower[1,'95%']

##      95% 
## 192.9222

#The confidence intervals tend to be tighter when calculated by hand rather than
# through the formula.

#7) For this exercise use data set eggs, the price of a dozen eggs in the 
# United States from 1900–1993. Experiment with the various options in the 
#holt() function to see how much the forecasts change with damped trend, 
#or with a Box-Cox transformation. Try to develop an intuition of what each 
#argument is doing to the forecasts.
# [Hint: use h=100 when calling holt() so you can clearly see the 
# differences between the various options when plotting the forecasts.]
# Which model gives the best RMSE?

fit1 <- holt(eggs,h=100)
fit2 <- holt(eggs,h=100, damped = T)
fit3 <- holt(eggs,h=100, damped = F, biasadj = T)
fit4 <- holt(eggs,h=100, damped = T, biasadj = T)
fit5 <- holt(eggs,h=100, damped = T, biasadj = T, lambda = 'auto')
fit6 <- holt(eggs,h=100, damped = F, biasadj = T, lambda = 'auto')
fit7 <- holt(eggs,h=100, damped = F, biasadj = F, lambda = 'auto')

autoplot(eggs) +
 autolayer(fit1, series = 'Base') +
 autolayer(fit2, series = 'Damped') +
 autolayer(fit3, series = 'BiasAdj') +
 autolayer(fit4, series = 'Damped + Biasadj') +
 autolayer(fit5, series = 'Damped + Biasadj + Lambda') +
 autolayer(fit6, series = 'Biasadj + Lambda') +
 autolayer(fit7, series = 'Lambda')

fit1.rmse <- rmse(eggs, fit1$fitted)
fit2.rmse <- rmse(eggs, fit2$fitted)
fit3.rmse <- rmse(eggs, fit3$fitted)
fit4.rmse <- rmse(eggs, fit4$fitted)
fit5.rmse <- rmse(eggs, fit5$fitted)
fit6.rmse <- rmse(eggs, fit6$fitted)
fit7.rmse <- rmse(eggs, fit7$fitted)

fit1.rmse

## [1] 26.58219

fit2.rmse

## [1] 26.54019

fit3.rmse

## [1] 26.58219

fit4.rmse

## [1] 26.54019

fit5.rmse

## [1] 26.58589

fit6.rmse

## [1] 26.38689

fit7.rmse

## [1] 26.39376

#the lambda with a biasadj performed the best out of all the models based on RMSE

#8) Recall your retail time series  data 
library(readxl)
retail <- read_excel("C:/Users/Joseph/Downloads/retail (1).xlsx", skip= 1)
retail.ts <- ts(retail[,"A3349873A"],frequency=12, start=c(1982,4))
# a) Why is multiplicative seasonality necessary for this series?
autoplot(retail.ts)

#it is clear from the chart that the seasonal effect has increased with time.
# b) Apply Holt-winters' multiplicative method to the data. Experiment with making the 
#  trend damped.
retail.hw.dm <- hw(retail.ts,seasonal = 'multiplicative', damped= T )
retail.hw.m <- hw(retail.ts,seasonal = 'multiplicative')
autoplot(retail.ts) +
 autolayer(retail.hw.dm,series = 'Damped + Multiplicative') +
 autolayer(retail.hw.m, series = 'Multiplicative') +
 autolayer(fitted(retail.hw.dm), series = 'DM Fitted') +
 autolayer(fitted(retail.hw.m), series = 'M Fitted')

# c) Compare the RMSE of the one-step forecasts from the two methods. 
#    Which do you prefer?
retail.hw.m.error <- tsCV(retail.ts, hw, h = 1, seasonal = "multiplicative")
retail.hw.dm.error <- tsCV(retail.ts, hw, h = 1, seasonal = "multiplicative", damped = TRUE)
sqrt(mean(retail.hw.m.error^2, na.rm = TRUE))

## [1] 14.72762

sqrt(mean(retail.hw.dm.error^2, na.rm = TRUE))

## [1] 14.94306

#RMSE are almost the same, this is likely because damped has a larger impact on
# longer forecasts. For long-term forecasting it is better to have a damped HW.

# d) Check that the residuals from the best method look like white noise.
checkresiduals(retail.hw.dm)

## 
##  Ljung-Box test
## 
## data:  Residuals from Damped Holt-Winters' multiplicative method
## Q* = 42.932, df = 7, p-value = 3.437e-07
## 
## Model df: 17.   Total lags used: 24

#The residuals don't look like whitenoise because there significant peaks and periods 
# where the variation changes.
# e) Now find the test set RMSE, while training the model to the end of 2010. Can you 
#    beat the seasonal naive approach.
retail.ts.train <- window(retail.ts, end = c(2010,12))
retail.ts.valid <- window(retail.ts, end = c(2011))
#Seasonally Naive method
seasNaive <- snaive(retail.ts.train)
seasNaive.acc <- forecast::accuracy(seasNaive,retail.ts.valid)
#RMSE of Seasonally Naive
seasNaive.acc[2,2]

## [1] 30

#HW damped
retail.hw <- hw(retail.ts.train, damped= T, biasadj = T)
retail.hw.acc <- forecast::accuracy(retail.hw, retail.ts.valid)
retail.hw.acc[2,2]

## [1] 32.02955

# I was unable beat the seasonally naive model with HW approach the closest RMSE is 32

#9) For the same retail data, try an STL decomposition applied to the Box-Cox transformed
# series, followed by ETS on the seasonally adjusted data. How does that compare with 
# your best previous forecasts on the test set.
myLambda <- BoxCox.lambda(retail.ts.train)
retail.bcSTL <-BoxCox(retail.ts.train, myLambda) %>%
 mstl(s.window = 13, robust = T)%>%
 forecast(h=36,lambda=myLambda)

## Warning in InvBoxCox(fcast$mean, lambda, biasadj, fcast): biasadj
## information not found, defaulting to FALSE.

retail.bcETS <-BoxCox(retail.ts.train, myLambda) %>%
 ets()%>%
 forecast(h=36,lambda=myLambda)

## Warning in forecast.ets(., h = 36, lambda = myLambda): biasadj information
## not found, defaulting to FALSE.

Metrics::rmse(retail.bcSTL$mean, retail.ts.valid)

## [1] 39.92529

Metrics::rmse(retail.bcETS$mean, retail.ts.valid)

## [1] 32.32293

#the ETS model performed better but not as good as HW

# 10) For this exercise use data set ukcars, the quarterly UK passenger vehicle production data from 1977Q1–2005Q1.
# a. Plot the data and describe the main features of the series.
plot(ukcars)

# there is a slight upward trend a seasonal pattern.
# b. Decompose the series using STL and obtain the seasonally adjusted data.
ukcars %>% stl(t.window=13,s.window='periodic',robust = T)%>%autoplot()

ukcars.seasadj <- seasadj(ukcars %>% stl(t.window=13,s.window='periodic',robust = T))
# c. Forecast the next two years of the series using an additive damped trend 
#    method applied to the seasonally adjusted data.
ukcars.hw <- hw(ukcars.seasadj,h=8,seasonal = 'additive', damped = T)
autoplot(ukcars.hw)

# d. Forecast the next two years of the series using Holt’s linear method 
#    applied to the seasonally adjusted data (as before but with damped=FALSE).
ukcars.holt <- holt(ukcars.seasadj, h= 8, damped= F)
autoplot(ukcars.holt)

# e. Now use ets() to choose a seasonal model for the data.
ukcars.ets <- ets(ukcars.seasadj, damped = T)
summary(ukcars.ets)

## ETS(A,Ad,N) 
## 
## Call:
##  ets(y = ukcars.seasadj, damped = T) 
## 
##   Smoothing parameters:
##     alpha = 0.5696 
##     beta  = 2e-04 
##     phi   = 0.9143 
## 
##   Initial states:
##     l = 347.3368 
##     b = -8.8748 
## 
##   sigma:  25.7803
## 
##      AIC     AICc      BIC 
## 1275.493 1276.285 1291.857 
## 
## Training set error measures:
##                    ME    RMSE      MAE       MPE     MAPE      MASE
## Training set 2.398058 25.2035 20.43629 0.2677076 6.536475 0.6660087
##                    ACF1
## Training set 0.03749911

#ETS(A,Ad,N) is choosen.
# f. Compare the RMSE of the ETS model with the RMSE of the models you 
#    obtained using STL decompositions. Which gives the better in-sample fits?
rmse(ukcars.ets$fitted, ukcars.seasadj)

## [1] 25.2035

rmse(ukcars.hw$fitted, ukcars.seasadj)

## [1] 25.18399

rmse(ukcars.holt$fitted, ukcars.seasadj)

## [1] 25.28237

#The Holt-Winters model seemed to perform the best out of the three.
# g. Compare the forecasts from the three approaches? Which seems most reasonable?
ukcars.ets.forecast <- forecast(ukcars.ets, h = 8)
autoplot(ukcars.seasadj) +
 autolayer(ukcars.ets.forecast, series = "ETS")+
 autolayer(ukcars.hw$mean, series = "HW")+
 autolayer(ukcars.holt$mean, series = "Holt")

# They all have relatively weak looking forecasts showing little variation in the forecast
# It looks like The Holt-Winters model performs the best.
# h. Check the residuals of your preferred model.
checkresiduals(ukcars.hw)

## 
##  Ljung-Box test
## 
## data:  Residuals from Damped Holt-Winters' additive method
## Q* = 32.225, df = 3, p-value = 4.692e-07
## 
## Model df: 9.   Total lags used: 12

# 11) For this exercise use data set visitors, the monthly Australian short-term 
# overseas visitors data, May 1985–April 2005.
# a. Make a time plot of your data and describe the main features of the series.
autoplot(visitors)

# There is an upward trend an increasing seasonal trend with time, indicating the use
# of a multiplicative model.

# b. Split your data into a training set and a test set comprising the last 
#    two years of available data. Forecast the test set using Holt-Winters’
#    multiplicative method.
visitors.train <- window(visitors, end = c(2003,4))
visitors.valid <- window(visitors, end = c(2005,4))

visitors.hw <- hw(visitors.train, seasonal = 'multiplicative',h=24)
visitors.hw$mean

##           Jan      Feb      Mar      Apr      May      Jun      Jul
## 2003                                     293.5512 311.3817 379.6865
## 2004 348.1741 390.3863 377.6150 337.3049 284.8762 302.1571 368.4105
## 2005 337.6781 378.5881 366.1740 327.0594                           
##           Aug      Sep      Oct      Nov      Dec
## 2003 341.5672 330.3820 371.9441 387.1607 471.3185
## 2004 331.3981 320.5215 360.8154 375.5478 457.1458
## 2005

# c. Why is multiplicative seasonality necessary here?
# Multiplicative seasonlity is required because the seasonal trend increases with time.
# d. Forecast the two-year test set using each of the following methods:
#    i. an ETS model;
visitors.ets <- ets(visitors.train)
visitors.ets.forecast <- forecast(visitors.ets, h = 24)
#   ii. an additive ETS model applied to a Box-Cox transformed series;
visitors.etsAdd <- ets(model = "AAA", BoxCox(visitors.train, lambda = BoxCox.lambda(visitors.train)))
visitors.etsAdd.forecast <- forecast(visitors.etsAdd, h =24)
#  iii. a seasonal naïve method;
visitors.snaive <- snaive(visitors.train, h = 24)
#   iv. an STL decomposition applied to the Box-Cox transformed data followed
#       by an ETS model applied to the seasonally adjusted (transformed) data.
visitors.BCseasadj<-seasadj(BoxCox(visitors.train, lambda = BoxCox.lambda(visitors.train)) %>% 
 stl(t.window=13,s.window='periodic',robust = T))
visitors.ets.BCSA <- ets(visitors.BCseasadj)
visitors.ets.BCSA.for <- forecast(visitors.ets.BCSA, h = 24)
# e. Which method gives the best forecasts? Does it pass the residual tests?
#ETS
rmse(visitors.ets.forecast$mean, visitors.valid)

## [1] 80.23124

checkresiduals(visitors.ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,Ad,M)
## Q* = 20.677, df = 7, p-value = 0.004278
## 
## Model df: 17.   Total lags used: 24

#residuals look good, white noise, and low p-value

#ETS Additive
rmse(visitors.etsAdd.forecast$mean, visitors.valid)

## [1] 415.0766

checkresiduals(visitors.etsAdd)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,Ad,A)
## Q* = 15.206, df = 7, p-value = 0.03344
## 
## Model df: 17.   Total lags used: 24

#residuals look good, two lag spikes 
#and the residuals aren't 100% white noise, but the p-value is much higher than ETS

#Seasonally Naive
rmse(visitors.snaive$mean,visitors.valid)

## [1] 50.30097

checkresiduals(visitors.snaive)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 295.02, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24

#Reisudals don't look like white noise, and the lags all go outside the significant boundries

# ETS STL seasonally adjusted BC
rmse(visitors.ets.BCSA.for$mean, visitors.valid)

## [1] 414.6479

checkresiduals(visitors.ets.BCSA)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,A,N)
## Q* = 26.159, df = 20, p-value = 0.1606
## 
## Model df: 4.   Total lags used: 24

#Residuals look like white-noise and the lags only have two spikes, 
# significantly better than the seasonally naive

#Overall the ETS model performed the best the residuals were white noise
# and it had a lower RMSE.

# f. Compare the same four methods using time series cross-validation with the tsCV()
#   function instead of using a training and test set. 
#   Do you come to the same conclusions?
fets_add_BoxCox <- function(y, h) {
 forecast(ets(
  y,
  lambda = BoxCox.lambda(y),
  additive.only = TRUE
 ),
 h = h)
}
fstlm <- function(y, h) {
 forecast(stlm(
  y, 
  lambda = BoxCox.lambda(y),
  s.window = frequency(y) + 1,
  robust = TRUE,
  method = "ets"
 ),
 h = h)
}
fets <- function(y, h) {
 forecast(ets(y),
          h = h)
}
# Comparing the models using RMSE
sqrt(mean(tsCV(visitors, snaive, h = 1)^2, na.rm = TRUE))

## [1] 32.78675

sqrt(mean(tsCV(visitors, fets_add_BoxCox, h = 1)^2,
          na.rm = TRUE))

## [1] 18.86439

sqrt(mean(tsCV(visitors, fstlm, h = 1)^2,
          na.rm = TRUE))

## [1] 17.49642

sqrt(mean(tsCV(visitors, fets, h = 1)^2, na.rm = TRUE))

## [1] 18.52985

sqrt(mean(tsCV(visitors, hw, h = 1, 
               seasonal = "multiplicative")^2,
          na.rm = TRUE))

## [1] 19.62107

#Using TSCV we get much better informative results. It is clear the STL model performs the best.

# 12) the fets() function below returns ETS forecasts.
fets <- function(y, h) {
 forecast(ets(y),
          h = h)
}

# a. Apply tsCV() for a forecast horizon of h= 4, for both ETS and seas naive methods to
#    the qcemenet data

ets.tscv <- tsCV(qcement, fets, h=4)
sNaive.tscv <- tsCV(qcement, snaive, h= 4)

# b. Compute the MSE of the resulting 4-step-ahead errors.
#  (Hint: make sure you remove missing values.) Why are there missing ]
#  values? Comment on which forecasts are more accurate. Is this what 
#  you expected?

mean(ets.tscv^2,na.rm=T)

## [1] 0.01250954

mean(sNaive.tscv^2,na.rm=T)

## [1] 0.01792147

#The ets model is more accurate, this is what I would expect as it includes
# more information, CV often proves non-naive approaches have a better performance.

# 13) Compare ets(), snaive() and stlf() on the following six time series.
#     For stlf(), you might need to use a Box-Cox transformation. Use a 
#     test set of three years to decide what gives the best forecasts. 
#     ausbeer, bricksq, dole, a10, h02, usmelec.

#Checking for BoxCox Transformation
plot(ausbeer) #Pass

plot(bricksq) #intermediate

plot(dole)  #Required

plot(a10) #intermediate - will perform better w/ multiplicative

plot(h02) #not required

plot(usmelec) #not required

#Making transformations
bricksq.BC <- BoxCox(bricksq, BoxCox.lambda(bricksq))
a10.BC <- BoxCox(a10, BoxCox.lambda(a10))

#validation sets
ausbeer.train <- window(ausbeer, end = c(2007,2))
ausbeer.test  <- window(ausbeer, start = c(2007,3))
bricksq.BC.train    <- window(bricksq.BC, end = c(1991,1))
bricksq.BC.test    <- window(bricksq.BC, start = c(1991,2))
dole.train <- window(dole, end=c(1989,7))
dole.test <- window(dole, start=c(1989,8))
a10.train <- window(a10.BC, end = c(2005,6))
a10.test <- window(a10.BC, start = c(2005,7))
h02.train <- window(h02, end = c(2005,7))
h02.test <- window(h02, start = c(2005,8))
usmelec.train <- window(usmelec, end = c(2010,6))
usmelec.test <- window(usmelec, start = c(2010,7))
#ETS
ausbeer.ets <- ets(ausbeer.train)
bricksq.BC.ets <- ets(bricksq.BC.train)
dole.ets <- ets(dole.train)
a10.ets <- ets(a10.train)
h02.ets <- ets(h02.train)
usmelec.ets <- ets(usmelec.train)
#ETS forecasts
ausbeer.etsF <- forecast(ausbeer.ets, h=12)
bricksq.BC.etsF <- forecast(bricksq.BC.ets, h=12)
dole.etsF <- forecast(dole.ets, h=36)
a10.etsF <- forecast(a10.ets, h=36)
h02.etsF <- forecast(h02.ets, h=36)
usmelec.etsF <- forecast(usmelec.ets, h = 36)

#snaive
ausbeer.sn <- snaive(ausbeer.train, h = 12)
bricksq.BC.sn <- snaive(bricksq.BC.train, h= 12)
dole.sn <- snaive(dole.train, h= 36)
a10.sn <- snaive(a10.train, h = 36)
h02.sn <- snaive(h02.train, h = 36)
usmelec.sn <- snaive(usmelec.train, h= 36)

#stlf
ausbeer.stlf <- stlf(ausbeer.train, h = 12)
bricksq.BC.stlf <- stlf(bricksq.BC.train, h= 12)
dole.stlf <- stlf(dole.train, h= 36)
a10.stlf <- stlf(a10.train, h = 36)
h02.stlf <- stlf(h02.train, h = 36)
usmelec.stlf <- stlf(usmelec.train, h= 36)

#RMSE
#Ausbeer
forecast::accuracy(ausbeer.etsF,ausbeer.test)

##                      ME      RMSE       MAE          MPE     MAPE
## Training set -0.3466095 15.781973 11.979955 -0.064073435 2.864311
## Test set      0.1272571  9.620828  8.919347  0.009981598 2.132836
##                   MASE       ACF1 Theil's U
## Training set 0.7567076 -0.1942276        NA
## Test set     0.5633859  0.3763918 0.1783972

forecast::accuracy(ausbeer.sn,ausbeer.test)

##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  3.336634 19.67005 15.83168  0.9044009 3.771370 1.0000000
## Test set     -2.916667 10.80509  9.75000 -0.6505927 2.338012 0.6158537
##                      ACF1 Theil's U
## Training set 0.0009690785        NA
## Test set     0.3254581810 0.1981463

forecast::accuracy(ausbeer.stlf,ausbeer.test)

##                      ME     RMSE       MAE        MPE     MAPE      MASE
## Training set  0.6449182 13.77953 10.641492  0.1643171 2.552590 0.6721643
## Test set     -3.9074898 10.13010  8.597595 -0.9365677 2.089001 0.5430626
##                    ACF1 Theil's U
## Training set -0.1469508        NA
## Test set      0.3443458 0.1777939

# the models had similar performance bet based on RMSE on the test set ETS performed the best
#bricksq
forecast::accuracy(bricksq.BC.etsF,bricksq.BC.test)

##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.01996121 0.2419629 0.1755865 0.1409763 1.247006 0.4329129
## Test set     0.20641501 0.2854117 0.2458899 1.4176042 1.692439 0.6062480
##                   ACF1 Theil's U
## Training set 0.1434416        NA
## Test set     0.3798615  0.793706

forecast::accuracy(bricksq.BC.sn,bricksq.BC.test)

##                      ME      RMSE       MAE        MPE     MAPE     MASE
## Training set  0.1051226 0.5448411 0.4055930  0.7368813 2.873281 1.000000
## Test set     -0.3353677 0.5781175 0.4975619 -2.2995568 3.439080 1.226752
##                     ACF1 Theil's U
## Training set  0.77776982        NA
## Test set     -0.05416358  1.488247

forecast::accuracy(bricksq.BC.stlf,bricksq.BC.test)

##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.02095131 0.2281125 0.1675486 0.1506378 1.188061 0.4130953
## Test set     0.14549872 0.2641646 0.2239398 0.9999206 1.546620 0.5521294
##                   ACF1 Theil's U
## Training set 0.1966409        NA
## Test set     0.3346748 0.7433275

#STLF performed the best here which makes sense with the BC transformation

#dole
forecast::accuracy(dole.etsF,dole.test)

##                        ME      RMSE        MAE        MPE     MAPE
## Training set     98.00253  16130.58   9427.497  0.5518769  6.20867
## Test set     221647.42595 279668.65 221647.426 32.5447637 32.54476
##                   MASE      ACF1 Theil's U
## Training set 0.2995409 0.5139536        NA
## Test set     7.0424271 0.9394267  11.29943

forecast::accuracy(dole.sn,dole.test)

##                     ME      RMSE       MAE       MPE     MAPE     MASE
## Training set  12606.58  56384.06  31473.16  3.350334 27.73651 1.000000
## Test set     160370.33 240830.92 186201.00 20.496197 27.38678 5.916184
##                   ACF1 Theil's U
## Training set 0.9781058        NA
## Test set     0.9208325  9.479785

forecast::accuracy(dole.stlf,dole.test)

##                       ME       RMSE        MAE        MPE      MAPE
## Training set    272.8467   6394.784   3898.066  0.5609317  4.972168
## Test set     203561.6017 266924.864 206434.081 29.0134356 29.748923
##                   MASE       ACF1 Theil's U
## Training set 0.1238537 0.02000331        NA
## Test set     6.5590519 0.93758582  10.63718

#in this instance the SNAIVE RMSE was the lowest.

#a10
forecast::accuracy(a10.etsF,a10.test)

##                        ME       RMSE        MAE          MPE     MAPE
## Training set 7.659502e-05 0.06455211 0.05014221 -0.002655551 2.224162
## Test set     5.129718e-02 0.13739413 0.11893283  1.217786471 3.234877
##                   MASE        ACF1 Theil's U
## Training set 0.3212884 -0.04890245        NA
## Test set     0.7620674 -0.01633399 0.4906605

forecast::accuracy(a10.sn,a10.test)

##                     ME      RMSE       MAE      MPE     MAPE     MASE
## Training set 0.1523955 0.1731097 0.1560660 6.682003 6.842931 1.000000
## Test set     0.3393671 0.4025493 0.3406072 8.979124 9.018149 2.182456
##                   ACF1 Theil's U
## Training set 0.2212723        NA
## Test set     0.6073193  1.423661

forecast::accuracy(a10.stlf,a10.test)

##                         ME       RMSE        MAE         MPE     MAPE
## Training set -9.590892e-05 0.05937144 0.04677082 -0.04963522 2.090263
## Test set      5.634899e-02 0.13922876 0.11975457  1.35436706 3.248782
##                   MASE        ACF1 Theil's U
## Training set 0.2996861 -0.09757243        NA
## Test set     0.7673327  0.03711712 0.4935863

#ETS performed the best with the lowest RMSE

#h02
forecast::accuracy(h02.etsF,h02.test)

##                        ME       RMSE        MAE        MPE     MAPE
## Training set 0.0004513622 0.04579930 0.03426080 -0.2209876 4.560572
## Test set     0.0342751950 0.08019185 0.06763825  2.8172154 7.267771
##                   MASE        ACF1 Theil's U
## Training set 0.5745005  0.05057522        NA
## Test set     1.1341883 -0.12581913 0.4581788

forecast::accuracy(h02.sn,h02.test)

##                        ME       RMSE        MAE        MPE     MAPE
## Training set  0.037749040 0.07171709 0.05963581  5.0942482 8.197262
## Test set     -0.004309813 0.08238325 0.06787933 -0.1376051 7.444182
##                  MASE       ACF1 Theil's U
## Training set 1.000000 0.40080565        NA
## Test set     1.138231 0.09840611 0.4929215

forecast::accuracy(h02.stlf,h02.test)

##                         ME       RMSE        MAE        MPE     MAPE
## Training set -9.933146e-05 0.04222490 0.03158308 -0.1504343 4.456942
## Test set     -2.785256e-04 0.07303399 0.06061427 -1.4831244 7.115336
##                   MASE        ACF1 Theil's U
## Training set 0.5295993 -0.06290209        NA
## Test set     1.0164072 -0.02827179 0.4494609

#STLF had the lowest RMSE.

#usmelec
forecast::accuracy(usmelec.etsF,usmelec.test)

##                        ME      RMSE       MAE        MPE     MAPE
## Training set  -0.07834851  7.447167  5.601963 -0.1168709 2.163495
## Test set     -10.20080652 15.291359 13.123441 -3.1908161 3.926338
##                   MASE      ACF1 Theil's U
## Training set 0.6225637 0.2719249        NA
## Test set     1.4584491 0.4679496 0.4859495

forecast::accuracy(usmelec.sn,usmelec.test)

##                    ME     RMSE       MAE      MPE     MAPE     MASE
## Training set 4.921564 11.58553  8.998217 2.000667 3.511648 1.000000
## Test set     5.475972 17.20037 13.494750 1.391437 3.767514 1.499714
##                   ACF1 Theil's U
## Training set 0.4841860        NA
## Test set     0.2692544 0.4765145

forecast::accuracy(usmelec.stlf,usmelec.test)

##                        ME     RMSE       MAE         MPE     MAPE
## Training set  -0.06835845  6.32711  4.786752 -0.06755695 1.872096
## Test set     -12.88663019 17.45005 15.416271 -4.04251340 4.675476
##                   MASE      ACF1 Theil's U
## Training set 0.5319668 0.1180312        NA
## Test set     1.7132585 0.5374051 0.5586222

#ETS performed he best based off of the RMSE.

#Overall it makes sense to try different models as it is difficult to tell
# exactly which model will perform the best.

#14) a. Use ets() on the following series bicoal, chicken, dole, 
# usdeaths, lynx, ibmclose, eggs. Does it always give good forecasts?


#Bicoal
mean(tsCV(bicoal, fets, h=10)^2, na.rm = T)

## [1] 11565.88

mean(tsCV(bicoal, snaive, h=10)^2, na.rm = T)

## [1] 12081.3

#Ets performs better than SNaive here

#Chicken
mean(tsCV(chicken, fets, h=10)^2, na.rm = T)

## [1] 1339.194

mean(tsCV(chicken, snaive, h=10)^2, na.rm = T)

## [1] 1212.813

#Snaive performs better than ets here

#dole
mean(tsCV(dole, fets, h=10)^2, na.rm = T)

## [1] 2758010177

mean(tsCV(dole, snaive, h=10)^2, na.rm = T)

## [1] 5331238533

#ETS performs better than Snaive here

#usdeaths
mean(tsCV(usdeaths, fets, h=10)^2, na.rm = T)

## [1] 1789494

mean(tsCV(usdeaths, snaive, h=10)^2, na.rm = T)

## [1] 556867.2

#Snaive performs better than ets here

#lynx
mean(tsCV(lynx, fets, h=10)^2, na.rm = T)

## [1] 5928549

mean(tsCV(lynx, snaive, h=10)^2, na.rm = T)

## [1] 4777830

#Snaive performs better than ets here

#ibmclose
mean(tsCV(ibmclose, fets, h=10)^2, na.rm = T)

## [1] 346.7712

mean(tsCV(ibmclose, snaive, h=10)^2, na.rm = T)

## [1] 322.2696

#Snaive performs better than ets here

#eggs
mean(tsCV(eggs, fets, h=10)^2, na.rm = T)

## [1] 2257.868

mean(tsCV(eggs, snaive, h=10)^2, na.rm = T)

## [1] 2361.556

#ETS performs better than snaive here

# 15) Show that the point forecasts from an ETS(M,A,M) model are the 
# same as those obtained using Holt-Winters’ multiplicative method.
usdeaths.ets2 <- ets(usdeaths,model = "MAM")
usdeaths.ets2F <- forecast(usdeaths.ets2, h=1)
usdeaths.hwM <- hw(usdeaths, seasonal = 'multiplicative', h=1)
usdeaths.ets2F$mean

##           Jan
## 1979 8233.107

usdeaths.hwM$mean

##          Jan
## 1979 8217.64

# 16) Show that the forecast variance for an ETS(A,N,N) model is given by
# sigma^2[1 + a^2(h − 1)]
ibmclose.ets2 <- ets(ibmclose, model = "ANN")
ibmclose.ets2.F <- forecast(ibmclose.ets2,1)
ibmclose.ets2.F

##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 370       356.9995 347.6907 366.3083 342.7629 371.2361

mysigma <- 7.2637
alpha <- .9999^2
h <- 2
ci <- mysigma*(1+alpha*(h-1))
ibmclose.ets2.F$mean[1]

## [1] 356.9995

ibmclose.ets2.F$mean[1] - ci

## [1] 342.4736

ibmclose.ets2.F$lower[1,'95%']

##      95% 
## 342.7629

# 17) Write down 95% prediction intervals for an ETS(A,N,N) model as a function of  
#Lt,a, h and sigma, assuming normally distributed errors.
#unsure as to what this question is asking:
# sigma^2[1 + a^2(h − 1)]

#CH8----
#1) Figure 8.31 shows the ACFs for 36 random numbers, 360 randomnumbers and 
# 1,000 random numbers.
# a.Explain the differences among these figures, do they all indicate that the
# data are white noise?

#Each of these series appears to be white noise, more analysis into the
# lag spikes of x1 and x2 would require more research to confirm. The confidence
# intervals vary greatly between the charts and this is why x2 may have more 
# cause for concern than any of the other variables.

#b. Why are the critical values at different distances from the mean of zero?
#  Why are the autocorrelations different in each figure when they refer 
#  to white noise?

#The critical values are different distances due to the underlying variation in 
# the data. 

#2) A classic example of non-stationary series is the  daily closing 
# IBM stock price series (data set ibmclose). Use R to plot the daily 
# closing prices for IBM stock and the ACF and PACF. Explain how each 
# plot shows that the series is non-stationary and should be differenced.

autoplot(ibmclose)

acf(ibmclose)

pacf(ibmclose)

#The plots tell us that overall there is strong autocorrelation, but the 
#PACF plot tells us that it is really the most recent price that is driving
#the autocorrelations, by lag 2 the ac is within the bounds.

#3) For the following series find an appropriate Box-Cox transformation
# and order of differencing in order to obtain stationary data.
library(urca)
#a. usnetelec
Box.test(usnetelec, lag = 10, type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  usnetelec
## X-squared = 329.22, df = 10, p-value < 2.2e-16

#The low p-value suggests that the data is autocorrelated and hould be differenced
usnetelec %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 3 lags. 
## 
## Value of test-statistic is: 1.464 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

#the test stat is close to 1 testing diff:
usnetelec %>% diff() %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 3 lags. 
## 
## Value of test-statistic is: 0.1585 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

# The test statistic looks much better and is below the 10pct crit val

#b. usgdp
Box.test(usgdp, lag =10, type = 'Ljung-Box')

## 
##  Box-Ljung test
## 
## data:  usgdp
## X-squared = 2078.3, df = 10, p-value < 2.2e-16

#Low P, data is autocorrelated
usgdp %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 4.6556 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

# requires a differencing
usgdp %>% diff() %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 1.7909 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

#still requires a higher order of diff
usgdp %>% diff() %>% diff() %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 0.021 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

#second order passes the test.

#c. mcopper
Box.test(mcopper, lag =10, type = 'Ljung-Box')

## 
##  Box-Ljung test
## 
## data:  mcopper
## X-squared = 3819, df = 10, p-value < 2.2e-16

#Low P, data is autocorrelated
mcopper %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 6 lags. 
## 
## Value of test-statistic is: 5.01 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

# requires a differencing
mcopper %>% diff() %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 6 lags. 
## 
## Value of test-statistic is: 0.1843 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

#first order passes the test.

#d. enplanements
Box.test(enplanements, lag =10, type = 'Ljung-Box')

## 
##  Box-Ljung test
## 
## data:  enplanements
## X-squared = 2122.7, df = 10, p-value < 2.2e-16

#Low P, data is autocorrelated
enplanements %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 5 lags. 
## 
## Value of test-statistic is: 4.4423 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

# requires a differencing
enplanements %>% diff() %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 5 lags. 
## 
## Value of test-statistic is: 0.0086 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

#first order passes the test.

#e. visitors
Box.test(visitors, lag =10, type = 'Ljung-Box')

## 
##  Box-Ljung test
## 
## data:  visitors
## X-squared = 1522.6, df = 10, p-value < 2.2e-16

#Low P, data is autocorrelated
visitors %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 4.6025 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

# requires a differencing
visitors %>% diff() %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 0.0191 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

#first order passes the test.

#4) For the enplanements data, write down the differences you chose 
# above using backshift operator notation.
# (1-B)^1*y_t 

#5) For your retail data find the approprate order of differencing 
# after transformation if necessary
autoplot(retail.ts)

#no transformation needed.
retail.ts %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 5 lags. 
## 
## Value of test-statistic is: 5.9286 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

# requires differencing
retail.ts %>% diff() %>% ur.kpss %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 5 lags. 
## 
## Value of test-statistic is: 0.0522 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

#one order of differencing is required.

#6) Use R to simulate and plot some data from simple ARIMA Models.
#a. Use the following R code to generetae data from an AR(1) model with
# phi1 = 0.6 and sigma =1. the process starts w/ y1=0
y <- ts(numeric(100))
e <- rnorm(100)
quickAR <- function(phi, y, e){
for (i in 2:100){
 y[i] <- phi*y[i-1]+e[i]
}
return(y)
}
#b. produce a time plot for the series
myseries <- quickAR(.6,y,e)
autoplot(myseries)

myseries <- quickAR(.1,y,e)
autoplot(myseries)

myseries <- quickAR(1,y,e)
autoplot(myseries)

#The higher phi is the less variant the data is.

#c. Write your own code to generate data from an MA(1) model with 
#theta = .6 and sigma = 1


quickAR <- function(theta,seed.nr){
 set.seed(seed.nr)
 y <- ts(numeric(100))
 e <- rnorm(100)
 for (i in 2:100){
  y[i] <- theta*y[i-1]+e[i]
 }
 return(autoplot(y))
}

#d. Produce a time plot for the series. how does the plot change as you change phi?
quickAR(.1,1)

quickAR(.5,1)

quickAR(1,1)

#Effect si the same as phi
#e. Genereate data from an ARIMA(1,1) model with phi =.6, thetat = .6 and sigma = 1
quickARIMA <- function(n.obs, theta, seed.nr, phi){
 set.seed(seed.nr)
 y <- ts(numeric(n.obs))
 e <- rnorm(n.obs)
 for (i in 2:n.obs)
  y[i] <- phi*y[i-1] + theta*e[i-1] + e[i]
 return(autoplot(y))
}
quickARIMA(100,.6,1,.6)

# f. Generate data from an AR(2) model with phi1 = -.8, phi2 = -.3, and sigma =1

quickAR2 <- function(phi1, phi2,seed.nr){
 set.seed(seed.nr)
 y <- ts(numeric(100))
 e <- rnorm(100)
 for (i in 3:100){
  y[i] <- (-.8)*y[i-1]+ (-.3)*y[i-2]+e[i]
 }
 return(autoplot(y))
}
quickAR2(-.8,-.3,1)

# g. graph the latter two series and compare them
quickAR2(-.8,-.3,1)

quickARIMA(100,.6,1,.6)

#The ARIMA function is much less variant but has some higher extremes

#7) Consider wmurders, the number of women murdered each year 
# (per 100,000 standard population) in the United States.
# a. By studying appropriate graphs of the series in R, find an 
# appropriate ARIMA(p,d,q) model for these data.
plot(wmurders)

#couple of spikes performing transformation
wmurders.BC <- BoxCox(wmurders, BoxCox.lambda(wmurders))
acf(wmurders.BC)

pacf(wmurders.BC)

#PACF shows that first differences is required
#no transformation needed.
wmurders.BC %>% ur.kpss() %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 3 lags. 
## 
## Value of test-statistic is: 0.6745 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

# requires differencing
wmurders.BC %>% diff() %>% ur.kpss %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 3 lags. 
## 
## Value of test-statistic is: 0.5466 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

wmurders.BC.diff <- diff(wmurders.BC)
acf(wmurders.BC.diff)

pacf(wmurders.BC.diff)

#Degree 2 differencing required
wmurders.BC.diff2 <- diff(wmurders.BC.diff)
wmurders.BC.diff2 %>% ur.kpss %>% summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 3 lags. 
## 
## Value of test-statistic is: 0.0532 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

auto.arima(wmurders.BC)

## Series: wmurders.BC 
## ARIMA(1,2,1) 
## 
## Coefficients:
##           ar1     ma1
##       -0.3006  -0.786
## s.e.   0.1529   0.119
## 
## sigma^2 estimated as 0.002851:  log likelihood=80.37
## AIC=-154.74   AICc=-154.25   BIC=-148.83

#auto arima determined 1,2,1 as determined by the graphs

#b. Should you include a constant in the model? Explain.
# No a constant introduces drift into the model which we don't have.

#c. Write this model in terms of the backshift operator
#(1-B)^2*y_t

#d. Fit the model using R and examine the residuals. Is the model satisfactory?
fit1 <- auto.arima(wmurders.BC)
checkresiduals(fit1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,2,1)
## Q* = 10.378, df = 8, p-value = 0.2395
## 
## Model df: 2.   Total lags used: 10

#The model is satisfactory
#e. Forecast three times ahead. check your forecasts by hand to make 
# sure that you know how they have been calculated.
forecast(fit1,3)

##      Point Forecast     Lo 80     Hi 80     Lo 95     Hi 95
## 2005      0.8727642 0.8043418 0.9411866 0.7681212 0.9774072
## 2006      0.8394207 0.7467510 0.9320904 0.6976947 0.9811468
## 2007      0.8050389 0.6833171 0.9267607 0.6188814 0.9911964

#f. Create a plot of the series with forecasts and prediction 
# intervals for the next three periods shown.
autoplot(forecast(fit1, 3))

#g. Does Auto.arima() give the same model you have choosen?

#Yes the auto.arima function has given the same model I have choosen.

#8) Consider austa, the total international visitors to Australia 
# (in millions) for the period 1980-2015.
# a. Use auto.arima() to find an appropriate ARIMA model. What model 
# was selected. Check that the residuals look like white noise. 
# Plot forecasts for the next 10 periods.
fit2 <- auto.arima(austa)
checkresiduals(fit2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1) with drift
## Q* = 2.297, df = 5, p-value = 0.8067
## 
## Model df: 2.   Total lags used: 7

autoplot(forecast(fit2,10))

#b. Plot forecasts from an ARIMA(0,1,1) model with no drift and compare to part A
# remove MA term and plot again.
autoplot(forecast(arima(austa, c(0,1,1))),h=10)

#flat prediction
autoplot(forecast(arima(austa, c(0,1,0))),h=10)

#w/o moving average the prediction intervals are smaller.

#c. Plot forecasts from an ARIMA(2,1,3) model with drift. 
# Remove the constant and see what happens 
autoplot(forecast(arima(austa, c(2,1,0))),h=10)

#d. Plot forecasts from an ARIMA(0,0,1) model with a constant. 
# Remove the MA term and plot again.
autoplot(forecast(arima(austa, c(0,0,1))),h=10)

#e. Plot forecasts from an ARIMA(0,2,1) model with no constant.
autoplot(forecast(arima(austa, c(0,2,1))),h=10)

#9) For the usgdp series:
# a. if necessary, find a suitable Box-Cox transformation for the data;
autoplot(usgdp)

autoplot(BoxCox(usgdp,BoxCox.lambda(usgdp)))

usgdp.bc <-BoxCox.lambda(usgdp)
# b. fit a suitable ARIMA model to the transformed data using auto.arima();
aARfit <- auto.arima(usgdp, lambda = usgdp.bc)
summary(aARfit)

## Series: usgdp 
## ARIMA(2,1,0) with drift 
## Box Cox transformation: lambda= 0.366352 
## 
## Coefficients:
##          ar1     ar2   drift
##       0.2795  0.1208  0.1829
## s.e.  0.0647  0.0648  0.0202
## 
## sigma^2 estimated as 0.03518:  log likelihood=61.56
## AIC=-115.11   AICc=-114.94   BIC=-101.26
## 
## Training set error measures:
##                    ME    RMSE      MAE         MPE      MAPE      MASE
## Training set 1.195275 39.2224 29.29521 -0.01363259 0.6863491 0.1655687
##                     ACF1
## Training set -0.03824844

# c. try some other plausible models by experimenting with the orders chosen;
usgdp.arima1<-arima(usgdp, order=c(1,1,0))
checkresiduals(usgdp.arima1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)
## Q* = 50.316, df = 7, p-value = 1.252e-08
## 
## Model df: 1.   Total lags used: 8

usgdp.arima2<-arima(usgdp, order=c(0,1,0))
checkresiduals(usgdp.arima2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)
## Q* = 108.19, df = 8, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 8

usgdp.arima3<-arima(usgdp, order=c(2,1,0))
checkresiduals(usgdp.arima3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)
## Q* = 16.013, df = 6, p-value = 0.01368
## 
## Model df: 2.   Total lags used: 8

# d. choose what you think is the best model and check the residual diagnostics;
checkresiduals(aARfit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0) with drift
## Q* = 6.5772, df = 5, p-value = 0.254
## 
## Model df: 3.   Total lags used: 8

# e. produce forecasts of your fitted model. Do the forecasts look reasonable?
autoplot(forecast(aARfit, h= 10))

#the results look reasonable
# f. compare the results with what you would obtain using ets() (with no transformation).
usgdp.ets <- ets(usgdp)
checkresiduals(usgdp.ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,A,N)
## Q* = 21.461, df = 4, p-value = 0.0002565
## 
## Model df: 4.   Total lags used: 8

autoplot(forecast(usgdp.ets,h=10))

#the prediction intervals are better for the ARIMA model.

#10) Consider austourists, the quarterly number of international 
# tourists to Australia for the period 1999–2010.
# a. Describe the time plot
tsdisplay(austourists)

#upward trend with a seasonal component
# b. What can you learn from the ACF graph?

#autocorrelated for the first 3 quarters spikes every 4th observation.
# c. What can you learn form the PACF graph?

# PACF of the quarterly australian tourist data only has dependency
# with 3 past observations which may be fixed with differencing.

# d. Produce plots of seasonally differenced data (1-B4)Yt. What do these
# graphs suggest?
autoplot(diff(austourists, lag=4, differences=1))

par(mfrow=c(1,2))
acf(diff(austourists,lag=4, differences=1))
pacf(diff(austourists,lag=4, differences=1))

#The residuals seem to have a p=1 q=1 relationship in the series.
# e. Does auto.arima() give the same model that you choose? if not, 
# which model do you think is better?
auto.arima(austourists)

## Series: austourists 
## ARIMA(1,0,0)(1,1,0)[4] with drift 
## 
## Coefficients:
##          ar1     sar1   drift
##       0.4705  -0.5305  0.5489
## s.e.  0.1154   0.1122  0.0864
## 
## sigma^2 estimated as 5.15:  log likelihood=-142.48
## AIC=292.97   AICc=293.65   BIC=301.6

#Auto arima chooses the same model.
# f. Write the model in terms of backshift operator, then without.
# (1−B4)Yt=BY(t−1)+et

#11) Consider usmelec,  the total net generation of electricity 
# (in billion kilowatt hours) by the U.S. electric industry (monthly 
# for the period January 1973 – June 2013). In general there are two 
# peaks per year: in mid-summer and mid-winter. 

# a. Examine the 12-month moving average of this series to see what 
# kind of trend is involved.
movAvg <- ma(usmelec, order=12)
plot(usmelec, col='gray', main="Electricity Net Generation",
     ylab="Billions of kilowatt hours (kWh)")
lines(movAvg)
#there appears to be an upward trend with time.

# b. Do the data need transforming? If so, find a suitable transformation.
#due to the increasing trend a BoxCox transformation is required.
lambda <- BoxCox.lambda(usmelec)
netusage <- BoxCox(usmelec, lambda=lambda)
autoplot(netusage)
plot(log(usmelec) - log(movAvg)) # Log transformation

plot(netusage - BoxCox(movAvg, lambda=lambda))   # Box-Cox transformation
#Box cox removes most of the variability.

# c. Are the data stationary? If not, find an appropriate differencing 
# which yields stationary data.
#No the data is not stationary but it is seasonal.

tsdisplay(diff(netusage, 1))

tsdisplay(diff(netusage,lag=12,1))

tsdisplay(diff(diff(netusage,lag=12,1)))

# adf.test(diff(netusage, 12))
kpss.test(diff(netusage,12))

## Warning in kpss.test(diff(netusage, 12)): p-value smaller than printed p-
## value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff(netusage, 12)
## KPSS Level = 1.1122, Truncation lag parameter = 5, p-value = 0.01

# adf.test(diff(diff(netusage, 12)))
kpss.test(diff(diff(netusage,12)))

## Warning in kpss.test(diff(diff(netusage, 12))): p-value greater than
## printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff(diff(netusage, 12))
## KPSS Level = 0.01669, Truncation lag parameter = 5, p-value = 0.1

#The unit root tests suggest that the series are now stationary after 2nd order differencing.

# d.Identify a couple of ARIMA models that might be useful in 
# describing the time series. Which of your models is the best 
# according to their AIC values?
test.arima <- function(t.series, params){
 order <- as.numeric(params[1:3])
 seasonal <- as.numeric(params[4:6])
 df <- data.frame(model=paste0("ARIMA(", 
                               paste0(params, collapse=","), 
                               ")"),
                  AIC=arima(t.series, 
                             order=order, 
                             seasonal=seasonal)$aic)
 return(df)
}

gridSearch <- expand.grid(c(0, 1, 2), #p
                          c(2), #d
                          c(0, 1), #q
                          c(1, 2), #P
                          c(1), #D
                          c(0, 1)  #Q
)
df.list <- apply(gridSearch, MARGIN=1, FUN=function(x) {test.arima(usmelec, x)})

df <- do.call(rbind, df.list)

df

##                 model      AIC
## 1  ARIMA(0,2,0,1,1,0) 3925.638
## 2  ARIMA(1,2,0,1,1,0) 3773.422
## 3  ARIMA(2,2,0,1,1,0) 3617.919
## 4  ARIMA(0,2,1,1,1,0) 3489.316
## 5  ARIMA(1,2,1,1,1,0) 3452.156
## 6  ARIMA(2,2,1,1,1,0) 3403.781
## 7  ARIMA(0,2,0,2,1,0) 3889.878
## 8  ARIMA(1,2,0,2,1,0) 3723.813
## 9  ARIMA(2,2,0,2,1,0) 3590.935
## 10 ARIMA(0,2,1,2,1,0) 3450.950
## 11 ARIMA(1,2,1,2,1,0) 3411.202
## 12 ARIMA(2,2,1,2,1,0) 3372.929
## 13 ARIMA(0,2,0,1,1,1) 3812.535
## 14 ARIMA(1,2,0,1,1,1) 3659.566
## 15 ARIMA(2,2,0,1,1,1) 3544.265
## 16 ARIMA(0,2,1,1,1,1) 3393.268
## 17 ARIMA(1,2,1,1,1,1) 3363.035
## 18 ARIMA(2,2,1,1,1,1) 3330.714
## 19 ARIMA(0,2,0,2,1,1) 3811.630
## 20 ARIMA(1,2,0,2,1,1) 3655.111
## 21 ARIMA(2,2,0,2,1,1) 3544.726
## 22 ARIMA(0,2,1,2,1,1) 3391.470
## 23 ARIMA(1,2,1,2,1,1) 3360.434
## 24 ARIMA(2,2,1,2,1,1) 3329.810

df[which.min(df$AIC),]

##                 model     AIC
## 24 ARIMA(2,2,1,2,1,1) 3329.81

# e. Estimate the parameters of your best model and do diagnostic 
# testing on the residuals. Do the residuals resemble white noise? 
# If not, try to find another ARIMA model which fits better.
fit1 <- arima(usmelec, order = c(2,2,1), seasonal = c(2,1,1))
checkresiduals(fit1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,2,1)(2,1,1)[12]
## Q* = 60.543, df = 18, p-value = 1.671e-06
## 
## Model df: 6.   Total lags used: 24

# adf.test(fit$residuals)
kpss.test(fit1$residuals)

## Warning in kpss.test(fit1$residuals): p-value greater than printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  fit1$residuals
## KPSS Level = 0.022049, Truncation lag parameter = 5, p-value = 0.1

# f.Forecast the next 15 years of electricity generation by the U.S.
# electric industry. Get the latest figures from the EIA to check the
# accuracy of your forecasts.
library(readxl)
elecOver <- read_excel("C:/Users/Joseph/Downloads/Table_1.1_Primary_Energy_Overview.xlsx")
names(elecOver) <- c('month', 'elec')
elecOver.ts <- ts(elecOver$elec, start= c(1973,1), frequency = 12)
plot(elecOver.ts)

fcast <- forecast(fit1, h=15*12)
plot(fcast)
lines(elecOver.ts, col='red')

# G. Eventually, the prediction intervals are so wide that the 
# forecasts are not particularly useful. How many years of forecasts 
# do you think are sufficiently accurate to be usable?

#Based on the cofidence intervals approximately the next 5 years seem 
# to have the most predictive accuracy.

#12) For the mcopper data:
# a. if necessary, find a suitable Box-Cox transformation for the data;
autoplot(mcopper)

movAvg <- ma(mcopper, order=12)
lambda <- BoxCox.lambda(mcopper)
netprices <- BoxCox(mcopper, lambda=lambda)
autoplot(netprices)

plot(log(usmelec) - log(movAvg)) # Log transformation

plot(netprices - BoxCox(movAvg, lambda=lambda))   # Box-Cox transformation

#BoxCox seems to be more like white noise.
# b. fit a suitable ARIMA model to the transformed data using auto.arima();
fit12 <- auto.arima(mcopper, lambda= lambda)
fit12

## Series: mcopper 
## ARIMA(0,1,1) 
## Box Cox transformation: lambda= 0.1919047 
## 
## Coefficients:
##          ma1
##       0.3720
## s.e.  0.0388
## 
## sigma^2 estimated as 0.04997:  log likelihood=45.05
## AIC=-86.1   AICc=-86.08   BIC=-77.43

# c. try some other plausible models by experimenting with the orders chosen;
df.list <- apply(gridSearch, MARGIN=1, FUN=function(x) 
 {test.arima(mcopper, x)})

df <- do.call(rbind, df.list)

df

##                 model      AIC
## 1  ARIMA(0,2,0,1,1,0) 6771.437
## 2  ARIMA(1,2,0,1,1,0) 6717.869
## 3  ARIMA(2,2,0,1,1,0) 6660.403
## 4  ARIMA(0,2,1,1,1,0) 6579.486
## 5  ARIMA(1,2,1,1,1,0) 6538.480
## 6  ARIMA(2,2,1,1,1,0) 6535.362
## 7  ARIMA(0,2,0,2,1,0) 6737.754
## 8  ARIMA(1,2,0,2,1,0) 6684.161
## 9  ARIMA(2,2,0,2,1,0) 6629.310
## 10 ARIMA(0,2,1,2,1,0) 6551.373
## 11 ARIMA(1,2,1,2,1,0) 6508.398
## 12 ARIMA(2,2,1,2,1,0) 6505.775
## 13 ARIMA(0,2,0,1,1,1) 6656.245
## 14 ARIMA(1,2,0,1,1,1) 6589.775
## 15 ARIMA(2,2,0,1,1,1) 6529.324
## 16 ARIMA(0,2,1,1,1,1) 6456.676
## 17 ARIMA(1,2,1,1,1,1) 6422.693
## 18 ARIMA(2,2,1,1,1,1) 6422.826
## 19 ARIMA(0,2,0,2,1,1) 6658.071
## 20 ARIMA(1,2,0,2,1,1) 6591.294
## 21 ARIMA(2,2,0,2,1,1) 6530.839
## 22 ARIMA(0,2,1,2,1,1) 6458.553
## 23 ARIMA(1,2,1,2,1,1) 6424.635
## 24 ARIMA(2,2,1,2,1,1) 6424.788

df[which.min(df$AIC),]

##                 model      AIC
## 17 ARIMA(1,2,1,1,1,1) 6422.693

#Auto.arima model is better, likely because it doesn't include seasonal.
# d. choose what you think is the best model and check the residual diagnostics;
checkresiduals(fit12)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)
## Q* = 22.913, df = 23, p-value = 0.4659
## 
## Model df: 1.   Total lags used: 24

# e. produce forecasts of your fitted model. Do the forecasts look reasonable?
autoplot(forecast(fit12,h=5*12))

# f. compare the results with what you would obtain using ets() 
#  (with no transformation).
fit12.ets <- ets(mcopper)
fit12.ets

## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = mcopper) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.2141 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 265.0541 
##     b = -3.9142 
## 
##   sigma:  0.0632
## 
##      AIC     AICc      BIC 
## 8052.038 8052.189 8078.049

fit12

## Series: mcopper 
## ARIMA(0,1,1) 
## Box Cox transformation: lambda= 0.1919047 
## 
## Coefficients:
##          ma1
##       0.3720
## s.e.  0.0388
## 
## sigma^2 estimated as 0.04997:  log likelihood=45.05
## AIC=-86.1   AICc=-86.08   BIC=-77.43

#Based on AIC auto.arima's model performs signifcantly better.

# 13) Choose one of the following time series: hsales, auscafe, qauselec, qcement, qgas.
# a. Do the data need transforming? If so, find a suitable transformation.
autoplot(hsales)

movAvg <- ma(hsales, order=12)
lambda <- BoxCox.lambda(hsales)
netprices <- BoxCox(hsales, lambda=lambda)
autoplot(netprices)

plot(log(usmelec) - log(movAvg)) # Log transformation

plot(netprices - BoxCox(movAvg, lambda=lambda))   # Box-Cox transformation

#BoxCox seems to be more like white noise.
# b. Are the data stationary? If not, find an appropriate differencing which yields stationary data.

tsdisplay(diff(netprices, 1))

tsdisplay(diff(netprices,lag=12,1))

tsdisplay(diff(diff(netprices,lag=12,1)))

# adf.test(diff(netprices, 12))
kpss.test(diff(netprices,12))

## Warning in kpss.test(diff(netprices, 12)): p-value greater than printed p-
## value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff(netprices, 12)
## KPSS Level = 0.087312, Truncation lag parameter = 5, p-value = 0.1

# adf.test(diff(diff(netprices, 12)))
kpss.test(diff(diff(netprices,12)))

## Warning in kpss.test(diff(diff(netprices, 12))): p-value greater than
## printed p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff(diff(netprices, 12))
## KPSS Level = 0.045168, Truncation lag parameter = 5, p-value = 0.1

# c. Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AIC values?
test.arima <- function(t.series, params){
 order <- as.numeric(params[1:3])
 seasonal <- as.numeric(params[4:6])
 df <- data.frame(model=paste0("ARIMA(", 
                               paste0(params, collapse=","), 
                               ")"),
                  AIC=forecast::Arima(t.series, 
                            order=order, 
                            seasonal=seasonal,
                            lambda=lambda)$aic)
 return(df)
}
df.list <- apply(gridSearch, MARGIN=1, FUN=function(x) 
{test.arima(mcopper, x)})

df <- do.call(rbind, df.list)

df

##                 model        AIC
## 1  ARIMA(0,2,0,1,1,0)   12.16749
## 2  ARIMA(1,2,0,1,1,0)  -24.55333
## 3  ARIMA(2,2,0,1,1,0)  -92.34166
## 4  ARIMA(0,2,1,1,1,0) -143.71916
## 5  ARIMA(1,2,1,1,1,0) -203.36169
## 6  ARIMA(2,2,1,1,1,0) -208.96125
## 7  ARIMA(0,2,0,2,1,0)  -28.45613
## 8  ARIMA(1,2,0,2,1,0)  -62.80340
## 9  ARIMA(2,2,0,2,1,0) -129.10728
## 10 ARIMA(0,2,1,2,1,0) -183.02174
## 11 ARIMA(1,2,1,2,1,0) -242.98150
## 12 ARIMA(2,2,1,2,1,0) -250.21483
## 13 ARIMA(0,2,0,1,1,1) -149.36815
## 14 ARIMA(1,2,0,1,1,1) -187.35099
## 15 ARIMA(2,2,0,1,1,1) -256.30073
## 16 ARIMA(0,2,1,1,1,1) -307.75285
## 17 ARIMA(1,2,1,1,1,1) -363.80940
## 18 ARIMA(2,2,1,1,1,1) -369.73701
## 19 ARIMA(0,2,0,2,1,1) -147.43467
## 20 ARIMA(1,2,0,2,1,1) -185.44033
## 21 ARIMA(2,2,0,2,1,1) -254.33181
## 22 ARIMA(0,2,1,2,1,1) -305.78918
## 23 ARIMA(1,2,1,2,1,1) -361.91351
## 24 ARIMA(2,2,1,2,1,1) -367.78545

df[which.min(df$AIC),]

##                 model      AIC
## 18 ARIMA(2,2,1,1,1,1) -369.737

# d. Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better.
fit13.1 <- forecast::Arima(hsales,order = c(1,2,1), seasonal = c(1,1,1),lambda= lambda)
fit13.2 <- auto.arima(hsales)
checkresiduals(fit13.1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,2,1)(1,1,1)[12]
## Q* = 18.978, df = 20, p-value = 0.5233
## 
## Model df: 4.   Total lags used: 24

checkresiduals(fit13.2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0)(1,1,0)[12] with drift
## Q* = 39.66, df = 21, p-value = 0.008177
## 
## Model df: 3.   Total lags used: 24

#the residuals of my model rather than the auto.arima model look closer to white noise
# e. Forecast the next 24 months of data using your preferred model.
fcast <- forecast(fit13.1, h=24)
# f. Compare the forecasts obtained using ets()
fArima <- function(y, h) {
 fit13.1 <- forecast::Arima(hsales,order = c(1,2,1), seasonal = c(1,1,1),lambda= lambda)
 return(forecast(fit13.1, h=h))
}
mean(tsCV(hsales, fArima, h= 24)^2,na.rm=T)

## [1] 197.4602

mean(tsCV(hsales, fets_add_BoxCox, h=24)^2, na.rm=T)

## [1] 222.4153

#my model had the better results based on MSE
# 14) For the same time series you used in the previous exercise,
#  try using a non-seasonal model applied to the seasonally adjusted
#  data obtained from STL. The stlf() function will make the 
#  calculations easy (with method="arima"). Compare the forecasts with 
#  those obtained in the previous exercise. Which do you think is the 
#  best approach?
mean(tsCV(hsales, stlf, method ='arima', h= 24)^2, na.rm=T)

## [1] 155.2675

#This method had a slightly lower MSE due to the simplicity of this method
# it may perform better. But as mentioned before trying multiple models is likely the best method.
# 15) For your retail time series:
# a. develop an appropriate seasonal ARIMA model;
fit15 <- auto.arima(retail.ts.train)
# b. compare the forecasts with those you obtained in earlier chapters;
retail.Arima <- forecast(fit15, h= 36)

Metrics::rmse(retail.bcSTL$mean, retail.ts.valid)

## [1] 39.92529

Metrics::rmse(retail.Arima$mean, retail.ts.valid)

## [1] 42.71018

#STL seems to perform slightly better in our test/validation method.
# c. Obtain up-to-date retail data from the ABS website (Cat 8501.0, Table 11), 
# and compare your forecasts with the actual numbers. 
# How good were the forecasts from the various models?
library(readxl)
retail <- read_excel("C:/Users/Joseph/Downloads/8501011.xlsx", skip=1, sheet= 2)
retail.ts.test <- ts(retail[,"A3349873A"],frequency=12, start=c(1982,4))
Metrics::rmse(retail.bcSTL$mean, retail.ts.test)

## [1] 98.7482

Metrics::rmse(retail.Arima$mean, retail.ts.test)

## [1] 122.8322

Metrics::rmse(retail.bcETS$mean, retail.ts.valid)

## [1] 32.32293

#Based on the actual data the ets method was the best.

# 16) Consider sheep, the sheep population of England and Wales from 1867–1939.
# a. Produce a time plot of the time series.
autoplot(sheep)

# b. Assume you decide to fit the following model:
# yt = yt-1 + phi1(yt-1 - yt-2) + phi2(yt-2 - yt-3) + phi3(yt-3 - yt-4) + et
# where et is a white noise series. What sort of ARIMA model is this (i.e., what are p, d, and q)?
# (yt - yt-1) - phi1(yt-1 - yt-2) - phi2(yt-2 - yt-3) - phi3(yt-3 - yt-4) = et
# (1 - B)yt - phi1*B(1- B)yt - phi2*B^2(1- B)yt - phi3*B^3(1- B)yt = et
# (1 - phi1*B - phi2*B^2 - phi3*B^3)(1 - B)yt = et
# It is ARIMA(3, 1, 0) model.

# c. By Examining the ACF and PACF of the differenced data, 
# explain why this model is appropriate
ggtsdisplay(diff(sheep))

# ACF plot shows sinusoidally decreasing autocorrelation values 
# while PACF plot shows significant spikes at lag 1 to 3, but no 
# beyond lag 3. Therefore ARIMA(3, 1, 0) is appropriate.

# d. The last five values of the series are given below:
lst5 <- c(1648,1665,1627,1791,1797)
# The estimated phi1 = .42, phi2 = -.2, and phi3 = -.3. Without using the
# forecast function, calculate forecasts for the next three years (1940-42)
sheep.1940 = 1797 + 0.42*(1797 - 1791) -0.20*(1791 - 1627) - 0.30*(1627 - 1665)
sheep.1941 = sheep.1940 + 0.42*(sheep.1940 - 1797) -0.20*(1797 - 1791) - 0.30*(1791 - 1627)
sheep.1942 = sheep.1941 + 0.42*(sheep.1941 - sheep.1940) -0.20*(sheep.1940 - 1797) - 0.30*(1797 - 1791)
c(sheep.1940, sheep.1941, sheep.1942)

## [1] 1778.120 1719.790 1697.268

# e. Now fit the model in R and obtain the forecasts using forecast. How are
# they different from yours? Why?
fc_sheep_arima.3.1.0 <- forecast(
 Arima(sheep, order = c(3, 1, 0)),
 h = 3
)
fc_sheep_arima.3.1.0$mean

## Time Series:
## Start = 1940 
## End = 1942 
## Frequency = 1 
## [1] 1777.996 1718.869 1695.985

# calculated forecasts were a little bigger than the forecasts from Arima function. And the differences between the forecasts at the same time became bigger as time goes on. I think that it happened because of the differences of the coefficients. 
# Small differences in the coefficients made the difference between the first forecasts. And then the forecast values were used to calculate the next time point's forecasts. When the next time point's forecasts of Arima function were calculated, the difference became bigger. It looked like such situation repeated.
ar1 <- fc_sheep_arima.3.1.0$model$coef[1]
ar2 <- fc_sheep_arima.3.1.0$model$coef[2]
ar3 <- fc_sheep_arima.3.1.0$model$coef[3]
sheep.1940.new = 1797 + ar1*(1797 - 1791) + ar2*(1791 - 1627) + ar3*(1627 - 1665)
sheep.1941.new = sheep.1940.new + ar1*(sheep.1940.new - 1797) + ar2*(1797 - 1791) + ar3*(1791 - 1627)
sheep.1942.new = sheep.1941.new + ar1*(sheep.1941.new - sheep.1940.new) + ar2*(sheep.1940.new - 1797) + ar3*(1797 - 1791)
c(sheep.1940.new, sheep.1941.new, sheep.1942.new)

##      ar1      ar1      ar1 
## 1777.996 1718.869 1695.985

# above calculation confirms what I said about the differences.

#17) Annual bituminous coal production in the United States from 1920 to 1968 is in data set bicoal.
# a. Plot the annual bituminous coal production in the United States from 1920 to 1968 (data set bicoal).
autoplot(bicoal)

# It looked like there isn't any particular trend or seasonality.
# b. You decide to fit the following model to the series:
# yt = c + phi1*yt-1 + phi2*yt-2 + phi3*yt-3 + phi4*yt-4 + et
# where yt is the coal production in year t and et is a white noise series. What sort of ARIMA model is this (i.e., what are p, d, and q)?
# (1 - phi1*B - phi2*B^2 - phi3*B^3 - phi4*B^4)*yt = c + et
# if mu is the mean of yt,
# c = mu*(1 - phi1*B - phi2*B^2 - phi3*B^3 - phi4*B^4)
# This model is ARIMA(4, 0, 0) or AR(4).
# c. Explain why this model was chosen using the ACF and PACF.
ggAcf(bicoal, lag.max = 36)

ggPacf(bicoal, lag.max = 36)

# ACF plot shows sinusoidally decreasing autocorrelation values. PACF plot shows significant spikes at lag 1 and 4, but none beyond lag 4. Therefore AR(4) model is the appropriate choice.
# d. The last five values of the series are given below.
# Year             1964 1965    1966    1967    1968
# Millions of tons  467  512     534     552     545
# The estimated parameters are c = 162.00, phi1 = 0.83, phi2 = -0.34, phi3 = 0.55 and phi4 = -0.38. Without using the forecast function, calculate forecasts for the next three years (1969-1971).
c = 162.00
phi1 = 0.83 
phi2 = -0.34
phi3 = 0.55
phi4 = -0.38
bicoal.1969 <- c + phi1*545 + phi2*552 + phi3*534 + phi4*512
bicoal.1970 <- c + phi1*bicoal.1969 + phi2*545 + phi3*552 + phi4*534
bicoal.1971 <- c + phi1*bicoal.1970 + phi2*bicoal.1969 + phi3*545 + phi4*552
c(bicoal.1969, bicoal.1970, bicoal.1971)

## [1] 525.8100 513.8023 499.6705

# e. Now fit the model in R and obtain the forecasts from the same model. How are they different from yours? Why?
fc_bicoal_ar4 <- forecast(ar(bicoal, 4), h = 3)
fc_bicoal_ar4$mean

## Time Series:
## Start = 1969 
## End = 1971 
## Frequency = 1 
## [1] 526.2057 514.0658 500.0111

# The forecasts from ar function were a little bigger than the calculated forecasts. It also happened because of the small differences of coefficients.
phi1 <- fc_bicoal_ar4$model$ar[1]
phi2 <- fc_bicoal_ar4$model$ar[2]
phi3 <- fc_bicoal_ar4$model$ar[3]
phi4 <- fc_bicoal_ar4$model$ar[4]
c <- fc_bicoal_ar4$model$x.mean*(1 - phi1 - phi2 - phi3 - phi4)
bicoal.1969.new <- c + phi1*545 + phi2*552 + phi3*534 + phi4*512
bicoal.1970.new <- c + phi1*bicoal.1969.new + phi2*545 + phi3*552 + phi4*534
bicoal.1971.new <- c + phi1*bicoal.1970.new + phi2*bicoal.1969.new + phi3*545 + phi4*552
c(bicoal.1969.new, bicoal.1970.new, bicoal.1971.new)

## [1] 526.2057 514.0658 500.0111

# These calculations and results confirm the causation of the differences in forecasts.

# 18. 

# a. Install the Quandl package in R
  # install.packages("Quandl")
  library(Quandl)

## Warning: package 'Quandl' was built under R version 3.6.1

## Loading required package: xts

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 3.6.1

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

# b. Select a time series from Quandl. Then copy its short URL and import the data using

oil.OPEC <- Quandl("OPEC/ORB", type= 'timeSeries')
str(oil.OPEC)

## Time Series:          
##  Name:               object
## Data Matrix:        
##  Dimension:          4278 1
##  Column Names:       TS.1
##  Row Names:          2019-08-01  ...  2003-01-02
## Positions:          
##  Start:              2003-01-02
##  End:                2019-08-01
## With:               
##  Format:             %Y-%m-%d
##  FinCenter:          GMT
##  Units:              TS.1
##  Title:              Time Series Object
##  Documentation:      Sun Aug 04 20:29:55 2019

head(oil.OPEC)

## GMT
##             TS.1
## 2019-08-01 63.54
## 2019-07-31 65.53
## 2019-07-30 64.62
## 2019-07-29 63.79
## 2019-07-26 64.02
## 2019-07-25 64.55

# c. Plot graphs of the data, and try to identify an appropriate ARIMA model.
plot(oil.OPEC)

ndiffs(oil.OPEC)

## [1] 1

ggtsdisplay(diff(oil.OPEC))

## Warning: Removed 1 rows containing missing values (geom_point).

# I think that ARIMA(1, 1, 0) model will fit well to the data.
# d. Do residual diagnostic checking of your ARIMA model. Are the residuals white noise?
oil.OPEC_arima.1.1.0 <- Arima(
 oil.OPEC, order = c(1, 1, 0)
)
checkresiduals(oil.OPEC_arima.1.1.0)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)
## Q* = 36.188, df = 9, p-value = 3.672e-05
## 
## Model df: 1.   Total lags used: 10

oil.OPEC_aArima <- auto.arima(oil.OPEC)
checkresiduals(oil.OPEC_aArima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,1,1)
## Q* = 13.369, df = 6, p-value = 0.03753
## 
## Model df: 4.   Total lags used: 10

# The residuals are like white noise.
# Auto.arima selects c(3,1,1) and has a slightly lower AIC
# e. Use your chosen ARIMA model to forecast the next four years.
fcast.OPEC.aArima <- forecast(
 oil.OPEC_aArima, h = 365*4
)
autoplot(fcast.OPEC.aArima)

# The forecast values of oil production are increasing. But the increase speed is damping.
# f. Now try to identify an appropriate ETS model.
oil.OPEC_ets <- ets(oil.OPEC)
oil.OPEC_ets

## ETS(A,Ad,N) 
## 
## Call:
##  ets(y = oil.OPEC) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.1643 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 64.6782 
##     b = -0.2477 
## 
##   sigma:  1.0211
## 
##      AIC     AICc      BIC 
## 35954.79 35954.81 35992.96

# chosen model is ETS(A, Ad, N).
# g. Do residual diagnostic checking of your ETS model. Are the residuals white noise?
checkresiduals(oil.OPEC_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,Ad,N)
## Q* = 172.05, df = 5, p-value < 2.2e-16
## 
## Model df: 5.   Total lags used: 10

# The residuals are like white noise.
# h. Use your chosen ETS model to forecast the next four years.
fc_oil.OPEC_ets <- forecast(
 oil.OPEC_ets, h = 365*4
)
autoplot(fc_oil.OPEC_ets)

# i. Which of the two models do you prefer?
# I prefer ARIMA model, as it has a narrower prediction interal and will likely
# provide more accurate results.

CH7-8 exercises

Joseph Sansevero

August 4, 2019