Assignment 2 Chapter 7 & 8

#Chapter 7

#1. Consider the pigs series — the number of pigs slaughtered in Victoria each month.

#a.

summary(ses(pigs, h=4))

## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = pigs, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.2971 
## 
##   Initial states:
##     l = 77260.0561 
## 
##   sigma:  10308.58
## 
##      AIC     AICc      BIC 
## 4462.955 4463.086 4472.665 
## 
## Error measures:
##                    ME    RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 385.8721 10253.6 7961.383 -0.922652 9.274016 0.7966249 0.01282239
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995       98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995       98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995       98816.41 83958.37 113674.4 76092.99 121539.8

#alpha = 0.2971
# l0 = 77260.0561

#b.

fc_pigs = ses(pigs, h=4)

sd_pigs = sd(fc_pigs$residuals)

mean_pigs = fc_pigs$mean[1]

int_pigs_fc <- c(mean_pigs - (1.96*sd_pigs),mean_pigs + (1.96*sd_pigs))

int_pigs_fc

## [1]  78679.97 118952.84

#The intervals appear to be similar

#2. Write your own function to implement simple exponential smoothing. The function should take arguments y (the time series), alpha (the smoothing parameter α ) and level (the initial level ℓ0 ). It should return the forecast of the next observation in the series. Does it give the same forecast as ses()?

pig_ses = function(y, alpha, l0){
  y_hat <- l0
  for(index in 1:length(y)){
   y_hat <- alpha*y[index] + (1 - alpha)*y_hat 
  }
  cat("SES forecast: ",
      as.character(y_hat),
      sep = "\n")
}

alpha <- fc_pigs$model$par[1]
l0 <- fc_pigs$model$par[2]

pig_ses(pigs,alpha = alpha, l0 = l0)

## SES forecast: 
## 98816.4061115907

#Projections appear to be similar to the SES we saw before

pig_ses_2 <- function(pars = c(alpha, l0), y){
  
  error <- 0
  SSE <- 0
  alpha <- pars[1]
  l0 <- pars[2]
  y_hat <- l0
  
  for(index in 1:length(y)){
    error <- y[index] - y_hat
    SSE <- SSE + error^2
    
    y_hat <- alpha*y[index] + (1 - alpha)*y_hat 
  }
  
  return(SSE)
}

pig_optim = optim(par = c(0.5, pigs[1]), y = pigs, fn = pig_ses_2)


pig_optim

## $par
## [1] 2.990081e-01 7.637927e+04
## 
## $value
## [1] 19767049111
## 
## $counts
## function gradient 
##       91       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

print(paste0("Created SES: ", pig_optim$par[1]))

## [1] "Created SES: 0.299008094014243"

print(paste0("Created SES: ", pig_optim$par[2]))

## [1] "Created SES: 76379.2653476235"

print(paste0("R's SES: ", fc_pigs$model$par[1]))

## [1] "R's SES: 0.297148833372095"

print(paste0("R's SES: ", fc_pigs$model$par[2]))

## [1] "R's SES: 77260.0561458528"

#The values appear to be somewhat similar

pig_ses_3 = function(init_pars, data){

  fc_next <- 0
  
  SSE <- function(pars, data){
    error <- 0
    SSE <- 0
    alpha <- pars[1]
    l0 <- pars[2]
    y_hat <- l0
    
    for(index in 1:length(data)){
      error <- data[index] - y_hat
      SSE <- SSE + error^2
      
      y_hat <- alpha*data[index] + (1 - alpha)*y_hat 
    }
  
      fc_next <<- y_hat
    return(SSE)
  }
  
  # use optim function to get optimal values of alpha and l0.
  optim_pars <- optim(par = init_pars, data = data, fn = SSE)
  
  # return results
  return(list(
    Next_observation_forecast = fc_next,
    alpha = optim_pars$par[1],
    l0 = optim_pars$par[2]
    ))
}
pig_ses_3(c(0.5, pigs[1]), pigs)

## $Next_observation_forecast
## [1] 98814.63
## 
## $alpha
## [1] 0.2990081
## 
## $l0
## [1] 76379.27

print(paste0("R's SES: ", fc_pigs$model$par[1]))

## [1] "R's SES: 0.297148833372095"

print(paste0("R's SES: ", fc_pigs$model$par[2]))

## [1] "R's SES: 77260.0561458528"

#a.

autoplot(books)

#Both paperback and hardcover books have an upward trend in sales over time, however, there is a randomness to how much is sold on a given day

#b.

ses_paperback = ses(books[, "Paperback"], h = 4)
ses_hardcover = ses(books[, "Hardcover"], h = 4)

autoplot(books[, "Paperback"], series = "Paperback") +
  autolayer(ses_paperback, series = "Paperback") +
  autolayer(books[, "Hardcover"], series = "Hardcover") +
  autolayer(ses_hardcover, series = "Hardcover", PI = FALSE)

#c.

paperback_RMSE = sqrt(mean(ses_paperback$residuals^2))
hardcover_RMSE = sqrt(mean(ses_hardcover$residuals^2))

summary(paperback_RMSE)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   33.64   33.64   33.64   33.64   33.64   33.64

summary(hardcover_RMSE)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   31.93   31.93   31.93   31.93   31.93   31.93

#a.

paper_holt <- holt(books[, "Paperback"], h = 4)
hard_holt <- holt(books[, "Hardcover"], h = 4)
autoplot(books[, "Paperback"]) +
  autolayer(paper_holt)

autoplot(books[, "Hardcover"]) +
  autolayer(hard_holt)

#b.

rmse_paperback <- sqrt(mean(paper_holt$residuals^2))
rmse_hardcover <- sqrt(mean(hard_holt$residuals^2))
rmse_paperback

## [1] 31.13692

rmse_hardcover

## [1] 27.19358

#The RMSE for both paperback and hardback appear to be lower for the Holts method versus SES. 
#SES is more able to shine when there is more randomness to a dataset, whilst Holt's method is more able to accurately predict data when more linearity exists

#c.

#I think Holt's method works better for these particular series, as despite a lack of full linearity, there is still an upward trend that's more readily captured in a forecast usingg Holt's method, the RMSE is also lower for both paperback and hardcover

#d.

print(paste0("Paperback 95% Interval, Upper: ", 
paper_holt$upper[1, "95%"]))

## [1] "Paperback 95% Interval, Upper: 275.020545221845"

print(paste0("Paperback 95% Interval, Lower: ",
paper_holt$lower[1, "95%"]))

## [1] "Paperback 95% Interval, Lower: 143.912985820256"

print(paste0("Paperback 95% Interval, Upper: ", 
paper_holt$mean[1] + 1.96*rmse_paperback))

## [1] "Paperback 95% Interval, Upper: 270.495134632871"

print(paste0("Paperback 95% Interval, Lower: ",
paper_holt$mean[1] - 1.96*rmse_paperback))

## [1] "Paperback 95% Interval, Lower: 148.438396409231"

print(paste0("Hardcover 95% Interval, Upper: ", 
hard_holt$upper[1, "95%"]))

## [1] "Hardcover 95% Interval, Upper: 307.42557365277"

print(paste0("Hardcover 95% Interval, Upper: ", 
hard_holt$lower[1, "95%"]))

## [1] "Hardcover 95% Interval, Upper: 192.922170771941"

print(paste0("Hardcover 95% Interval, Upper: ",
hard_holt$mean[1] + 1.96*rmse_hardcover))

## [1] "Hardcover 95% Interval, Upper: 303.473285056784"

print(paste0("Hardcover 95% Interval, Upper: ",
hard_holt$mean[1] - 1.96*rmse_hardcover))

## [1] "Hardcover 95% Interval, Upper: 196.874459367927"

autoplot(eggs)

#there is a very clear, downward trend in the data

holt_egg = holt(eggs, h = 100)
autoplot(holt_egg) +
  autolayer(holt_egg$fitted)

holt_damp_egg = holt(eggs, damped = TRUE, h = 100)
autoplot(holt_damp_egg) +
  autolayer(holt_damp_egg$fitted)

holt_Box_damp_egg = holt(
  eggs, 
  damped = TRUE,
  lambda = BoxCox.lambda(eggs),
  h = 100)
autoplot(holt_Box_damp_egg) +
  autolayer(holt_Box_damp_egg$fitted)

#Based on the created forecasts, we can see that each integration is getting better in regards to avoiding dipping into the negative values for something that cannot be a negative number, such as the price of eggs in this instance.

library(readxl)
retail <- read_excel("C:/Users/dmcsw/Downloads/retail.xlsx", 
    skip = 1)

ts_retail <- ts(retail[, "A3349873A"],
                frequency = 12,
                start = c(1982, 4))


autoplot(ts_retail)

#This was necessary for this series, as there is a strong seasonality with the number of sales on a yearly basis

#b.

hw_retail <- hw(ts_retail,
                     seasonal = "multiplicative")
hw_damp_retail <- hw(ts_retail,
                      seasonal = "multiplicative",
                      damped = TRUE)
autoplot(hw_retail)

autoplot(hw_damp_retail)

#It seems that there is a wider range of outcomes available with the dampened holt-winters method.

#c.

tscv_retail <- tsCV(
  ts_retail, 
  hw, h = 1, seasonal = "multiplicative"
  )
damp_tscv_retail <- tsCV(
  ts_retail, 
  hw, h = 1, seasonal = "multiplicative", damped = TRUE
  )
sqrt(mean(tscv_retail^2, na.rm = TRUE))

## [1] 14.72762

sqrt(mean(damp_tscv_retail^2, na.rm = TRUE))

## [1] 14.94306

#There is a minimal difference between the RMSE of either, but the dampened model would be better to go with  as it will be less suceptible to forecasting a negative value at any point

#d.

checkresiduals(damp_tscv_retail)

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

#These residuals are difficult to determine if they are white noise or not, with that lack of clarity, it would be safer to assume that they are not white noise

#e.

ts_retail_train = window(ts_retail,
                          end = c(2011))
ts_retail_test = window(ts_retail,
                         start = 2010)
# Holt-Winters' damped method.
damp_hw_retail = hw(ts_retail_train,
                            h = 36,
                            seasonal = "multiplicative",
                            damped = TRUE)
autoplot(damp_hw_retail)

damp_check = accuracy(damp_hw_retail, ts_retail_test)

# try Holt-Winters' method.
hw_retail = hw(ts_retail_train,
                           h = 36,
                           seasonal = "multiplicative")
autoplot(hw_retail)

hw_check = accuracy(hw_retail, ts_retail_test)

damp_check

##                      ME      RMSE       MAE        MPE      MAPE     MASE
## Training set  0.6850894  8.918343  6.398241  0.2648898  3.209221 0.399920
## Test set     65.1135689 86.678289 66.456844 15.9445237 16.448947 4.153864
##                     ACF1 Theil's U
## Training set -0.03930028        NA
## Test set      0.55630834  1.432892

hw_check

##                      ME      RMSE       MAE        MPE      MAPE      MASE
## Training set  0.4341199  9.043952  6.468265  0.1680194  3.230092 0.4042968
## Test set     59.0723813 79.772765 60.469975 14.4405377 14.965546 3.7796564
##                    ACF1 Theil's U
## Training set 0.04843008        NA
## Test set     0.49513754   1.32284

#The RMSE and the ACF would suggest that neither model I tried was not as accurate as I would have hoped, but of the 2, I would choose to use the damped model

stl_ets_train <- ts_retail_train %>%
  stlm(
    s.window = 13,
    robust = TRUE,
    method = "ets",
    lambda = BoxCox.lambda(ts_retail_train)
  ) %>%
  forecast(
    h = 36,
    lambda = BoxCox.lambda(ts_retail_train)
    )

## Warning in InvBoxCox(fcast$mean, lambda, biasadj, fcast): biasadj information
## not found, defaulting to FALSE.

autoplot(stl_ets_train)

accuracy(stl_ets_train, ts_retail_test)

##                      ME      RMSE       MAE        MPE      MAPE      MASE
## Training set -0.6447266  8.848999  6.019668 -0.2989156  2.939973 0.3762574
## Test set     50.4143503 71.180518 52.207017 12.2159132 12.882611 3.2631828
##                      ACF1 Theil's U
## Training set -0.009661764        NA
## Test set      0.408707373  1.184903

#Comparing this approach to the previous models, it seems to be an improvement over the previous examples, even if only slight.

autoplot(ukcars)

#The data appears to have an overall upward trend, with some seasonality.

#b.

seas_ukcars = ukcars %>% 
  stl(s.window = 4, robust = TRUE) %>% 
  seasadj()

autoplot(seas_ukcars)

#c.

seas_fc_ukcars_damp = ukcars %>% 
  stlf(h = 8, etsmodel = "AAN", damped = TRUE)

autoplot(seas_fc_ukcars_damp)

#d.

seas_fc_ukcars = ukcars %>% 
  stlf(h = 8, etsmodel = "AAN", damped = FALSE)

autoplot(seas_fc_ukcars)

#e.

ets_ukcars = ets(ukcars)

summary(ets_ukcars)

## ETS(A,N,A) 
## 
## Call:
##  ets(y = ukcars) 
## 
##   Smoothing parameters:
##     alpha = 0.6199 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 314.2568 
##     s = -1.7579 -44.9601 21.1956 25.5223
## 
##   sigma:  25.9302
## 
##      AIC     AICc      BIC 
## 1277.752 1278.819 1296.844 
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
##                    ACF1
## Training set 0.02573334

#A,N,A came up as the ets model

#f.

accuracy(seas_fc_ukcars_damp)

##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 1.040088 22.81499 17.73846 -0.1296296 5.821059 0.5780878
##                    ACF1
## Training set 0.01590553

accuracy(seas_fc_ukcars)

##                      ME     RMSE    MAE        MPE     MAPE      MASE
## Training set -0.4024211 22.86408 17.732 -0.5948215 5.836029 0.5778774
##                    ACF1
## Training set 0.02416541

#The sampened model is overall the best model, as the ACF and the RMSE are less

#g.

#I think the most reasonable forecast is the dampened model, as the RMSE is lower than the others

#h.

checkresiduals(seas_fc_ukcars_damp)

## Warning in checkresiduals(seas_fc_ukcars_damp): The fitted degrees of freedom is
## based on the model used for the seasonally adjusted data.

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(A,Ad,N)
## Q* = 23.825, df = 3, p-value = 2.717e-05
## 
## Model df: 5.   Total lags used: 8

#The residuals don't appear to be as normally distributed as we would like to see from a forecasted model

autoplot(visitors)

ggseasonplot(visitors)

#b.

train_visitors = subset(visitors, 
                         end = length(visitors) - 24)
test_visitors = subset(visitors,
                        start = length(visitors) - 23)
hwm_visit_train = hw(train_visitors,
                            h = 24,
                            seasonal = "multiplicative")

#c.

autoplot(hwm_visit_train)

#Multiplicative allows you to see the effect of seasonality as visitors increased, whilst additive would not be able to account for an inrease in visitors in a meaningful or representative way

#d.

# d-1. an ETS model;
fc_ets_visit = forecast(ets(train_visitors), h = 24)
autoplot(fc_ets_visit)

# d-2. an additive ETS model applied to a Box-Cox transformed series;
fc_Box_visit = forecast(
  ets(train_visitors, 
      lambda = BoxCox.lambda(train_visitors),
      additive.only = TRUE),
  h = 24
)
autoplot(fc_Box_visit)

# d-3. a seasonal naive method;
fc_snaive_visit = snaive(train_visitors, h = 24)
autoplot(fc_snaive_visit)

# d-4. an STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
fc_Box_stl_visitors = train_visitors %>%
  stlm(
    lambda = BoxCox.lambda(train_visitors),
    s.window = 13,
    robust = TRUE,
    method = "ets"
  ) %>%
  forecast(h = 24)
autoplot(fc_Box_stl_visitors)

#e.

accuracy(fc_ets_visit)

##                     ME    RMSE      MAE       MPE     MAPE     MASE        ACF1
## Training set 0.7640074 14.5348 10.57657 0.1048224 3.994788 0.405423 -0.05311217

accuracy(fc_Box_visit)

##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 1.001363 14.97096 10.82396 0.1609336 3.974215 0.4149057
##                     ACF1
## Training set -0.02535299

accuracy(fc_snaive_visit)

##                    ME     RMSE      MAE      MPE     MAPE MASE      ACF1
## Training set 17.29363 31.15613 26.08775 7.192445 10.28596    1 0.6327669

accuracy(fc_Box_stl_visitors)

##                     ME     RMSE      MAE        MPE   MAPE      MASE
## Training set 0.5803348 13.36431 9.551391 0.08767744 3.5195 0.3661256
##                     ACF1
## Training set -0.05924203

#The STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data ended up performing the best of the group overall. 

checkresiduals(fc_Box_stl_visitors)

## Warning in checkresiduals(fc_Box_stl_visitors): The fitted degrees of freedom is
## based on the model used for the seasonally adjusted data.

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(M,Ad,N)
## Q* = 15.032, df = 19, p-value = 0.7205
## 
## Model df: 5.   Total lags used: 24

#The residuals seem to pass the test

#f.

f_Box = function(y, h) {
  forecast(ets(
    y,
    lambda = BoxCox.lambda(y),
    additive.only = TRUE
  ),
  h = h)
}
fstlm = function(y, h) {
  forecast(stlm(
    y, 
    lambda = BoxCox.lambda(y),
    s.window = frequency(y) + 1,
    robust = TRUE,
    method = "ets"
  ),
  h = h)
}
fets = function(y, h) {
  forecast(ets(y),
           h = h)
  }


sqrt(mean(tsCV(visitors, snaive, h = 1)^2, na.rm = TRUE))

## [1] 32.78675

sqrt(mean(tsCV(visitors, f_Box, h = 1)^2,
          na.rm = TRUE))

## [1] 18.86439

sqrt(mean(tsCV(visitors, fstlm, h = 1)^2,
          na.rm = TRUE))

## [1] 17.49642

sqrt(mean(tsCV(visitors, fets, h = 1)^2, na.rm = TRUE))

## [1] 18.52985

sqrt(mean(tsCV(visitors, hw, h = 1, 
               seasonal = "multiplicative")^2,
          na.rm = TRUE))

## [1] 19.62107

#The tsCV function does appear to draw the same conclusion.

#a.

fets = function(y, h) {
  forecast(ets(y), h = h)
}

qcement_tscv = tsCV(qcement, fets, h=4)

qcement_naive = tsCV(qcement, snaive, h= 4)

#b.

sqrt(mean(qcement_tscv^2, na.rm = TRUE))

## [1] 0.111846

sqrt(mean(qcement_naive^2, na.rm = TRUE))

## [1] 0.1338711

#Based on the values bellow, I would say that the ets model is more accurate, I would expect that over the naive model.

bricksq.BC = BoxCox(bricksq, BoxCox.lambda(bricksq))
a10.BC = BoxCox(a10, BoxCox.lambda(a10))

#validation sets
ausbeer.train = window(ausbeer, end = c(2007,2))
ausbeer.test  = window(ausbeer, start = c(2007,3))
bricksq.BC.train    = window(bricksq.BC, end = c(1991,1))
bricksq.BC.test    = window(bricksq.BC, start = c(1991,2))
dole.train = window(dole, end=c(1989,7))
dole.test = window(dole, start=c(1989,8))
a10.train = window(a10.BC, end = c(2005,6))
a10.test = window(a10.BC, start = c(2005,7))
h02.train = window(h02, end = c(2005,7))
h02.test = window(h02, start = c(2005,8))
usmelec.train = window(usmelec, end = c(2010,6))
usmelec.test = window(usmelec, start = c(2010,7))
#ETS
ausbeer.ets = ets(ausbeer.train)
bricksq.BC.ets = ets(bricksq.BC.train)
dole.ets = ets(dole.train)
a10.ets = ets(a10.train)
h02.ets = ets(h02.train)
usmelec.ets = ets(usmelec.train)
#ETS forecasts
ausbeer.etsF = forecast(ausbeer.ets, h=36)
bricksq.BC.etsF = forecast(bricksq.BC.ets, h=36)
dole.etsF = forecast(dole.ets, h=36)
a10.etsF = forecast(a10.ets, h=36)
h02.etsF = forecast(h02.ets, h=36)
usmelec.etsF = forecast(usmelec.ets, h = 36)

#snaive
ausbeer.sn = snaive(ausbeer.train, h = 36)
bricksq.BC.sn = snaive(bricksq.BC.train, h= 36)
dole.sn = snaive(dole.train, h= 36)
a10.sn = snaive(a10.train, h = 36)
h02.sn = snaive(h02.train, h = 36)
usmelec.sn = snaive(usmelec.train, h= 36)

#stlf
ausbeer.stlf = stlf(ausbeer.train, h = 36)
bricksq.BC.stlf = stlf(bricksq.BC.train, h= 36)
dole.stlf = stlf(dole.train, h= 36)
a10.stlf = stlf(a10.train, h = 36)
h02.stlf = stlf(h02.train, h = 36)
usmelec.stlf = stlf(usmelec.train, h= 36)


accuracy(ausbeer.etsF,ausbeer.test)

##                      ME      RMSE       MAE          MPE     MAPE      MASE
## Training set -0.3466095 15.781973 11.979955 -0.064073435 2.864311 0.7567076
## Test set      0.1272571  9.620828  8.919347  0.009981598 2.132836 0.5633859
##                    ACF1 Theil's U
## Training set -0.1942276        NA
## Test set      0.3763918 0.1783972

accuracy(ausbeer.sn,ausbeer.test)

##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  3.336634 19.67005 15.83168  0.9044009 3.771370 1.0000000
## Test set     -2.916667 10.80509  9.75000 -0.6505927 2.338012 0.6158537
##                      ACF1 Theil's U
## Training set 0.0009690785        NA
## Test set     0.3254581810 0.1981463

accuracy(ausbeer.stlf,ausbeer.test)

##                       ME      RMSE       MAE         MPE     MAPE      MASE
## Training set -0.08449615 13.640828 10.495557 -0.02319029 2.523657 0.6629464
## Test set     -0.74491615  9.596294  9.014064 -0.20867707 2.159142 0.5693686
##                    ACF1 Theil's U
## Training set -0.1623773        NA
## Test set      0.3154543 0.1712144

accuracy(bricksq.BC.etsF,bricksq.BC.test)

##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.01996121 0.2419629 0.1755865 0.1409763 1.247006 0.4329129
## Test set     0.25707641 0.3389809 0.2909121 1.7480916 1.983664 0.7172512
##                   ACF1 Theil's U
## Training set 0.1434416        NA
## Test set     0.5769625  0.919898

accuracy(bricksq.BC.sn,bricksq.BC.test)

##                      ME      RMSE       MAE        MPE     MAPE     MASE
## Training set  0.1051226 0.5448411 0.4055930  0.7368813 2.873281 1.000000
## Test set     -0.3259032 0.5522919 0.4649268 -2.2284771 3.205211 1.146289
##                    ACF1 Theil's U
## Training set  0.7777698        NA
## Test set     -0.1277042  1.395018

accuracy(bricksq.BC.stlf,bricksq.BC.test)

##                     ME      RMSE       MAE      MPE     MAPE      MASE
## Training set 0.0208919 0.2258669 0.1671388 0.150274 1.185157 0.4120850
## Test set     0.2002913 0.3047304 0.2627101 1.361419 1.796522 0.6477185
##                   ACF1 Theil's U
## Training set 0.2125626        NA
## Test set     0.4857060 0.8319484

accuracy(dole.etsF,dole.test)

##                        ME      RMSE        MAE        MPE     MAPE      MASE
## Training set     98.00253  16130.58   9427.497  0.5518769  6.20867 0.2995409
## Test set     221647.42595 279668.65 221647.426 32.5447637 32.54476 7.0424271
##                   ACF1 Theil's U
## Training set 0.5139536        NA
## Test set     0.9394267  11.29943

accuracy(dole.sn,dole.test)

##                     ME      RMSE       MAE       MPE     MAPE     MASE
## Training set  12606.58  56384.06  31473.16  3.350334 27.73651 1.000000
## Test set     160370.33 240830.92 186201.00 20.496197 27.38678 5.916184
##                   ACF1 Theil's U
## Training set 0.9781058        NA
## Test set     0.9208325  9.479785

accuracy(dole.stlf,dole.test)

##                       ME       RMSE        MAE        MPE      MAPE     MASE
## Training set    256.3644   6248.781   3836.768  0.5425694  4.804379 0.121906
## Test set     205359.4652 268646.638 208074.403 29.3215555 30.017034 6.611170
##                    ACF1 Theil's U
## Training set 0.01337877        NA
## Test set     0.93746400  10.71493

accuracy(a10.etsF,a10.test)

##                        ME       RMSE        MAE          MPE     MAPE      MASE
## Training set 7.659502e-05 0.06455211 0.05014221 -0.002655551 2.224162 0.3212884
## Test set     5.129718e-02 0.13739413 0.11893283  1.217786471 3.234877 0.7620674
##                     ACF1 Theil's U
## Training set -0.04890245        NA
## Test set     -0.01633399 0.4906605

accuracy(a10.sn,a10.test)

##                     ME      RMSE       MAE      MPE     MAPE     MASE      ACF1
## Training set 0.1523955 0.1731097 0.1560660 6.682003 6.842931 1.000000 0.2212723
## Test set     0.3393671 0.4025493 0.3406072 8.979124 9.018149 2.182456 0.6073193
##              Theil's U
## Training set        NA
## Test set      1.423661

accuracy(a10.stlf,a10.test)

##                         ME       RMSE        MAE        MPE     MAPE      MASE
## Training set -0.0001106891 0.05780298 0.04565029 -0.0493025 2.039699 0.2925062
## Test set      0.0557901445 0.13819615 0.11837298  1.3422799 3.212405 0.7584802
##                     ACF1 Theil's U
## Training set -0.09571412        NA
## Test set      0.04878539 0.4893801

accuracy(h02.etsF,h02.test)

##                        ME       RMSE        MAE        MPE     MAPE      MASE
## Training set 0.0004513622 0.04579930 0.03426080 -0.2209876 4.560572 0.5745005
## Test set     0.0342751950 0.08019185 0.06763825  2.8172154 7.267771 1.1341883
##                     ACF1 Theil's U
## Training set  0.05057522        NA
## Test set     -0.12581913 0.4581788

accuracy(h02.sn,h02.test)

##                        ME       RMSE        MAE        MPE     MAPE     MASE
## Training set  0.037749040 0.07171709 0.05963581  5.0942482 8.197262 1.000000
## Test set     -0.004309813 0.08238325 0.06787933 -0.1376051 7.444182 1.138231
##                    ACF1 Theil's U
## Training set 0.40080565        NA
## Test set     0.09840611 0.4929215

accuracy(h02.stlf,h02.test)

##                        ME       RMSE        MAE        MPE     MAPE      MASE
## Training set -0.000830773 0.04053338 0.03033702 -0.2685748 4.274993 0.5087047
## Test set     -0.006211506 0.07075988 0.05886616 -2.0093569 6.971515 0.9870941
##                     ACF1 Theil's U
## Training set -0.06958533        NA
## Test set     -0.06144624 0.4425916

accuracy(usmelec.etsF,usmelec.test)

##                        ME      RMSE       MAE        MPE     MAPE      MASE
## Training set  -0.07834851  7.447167  5.601963 -0.1168709 2.163495 0.6225637
## Test set     -10.20080652 15.291359 13.123441 -3.1908161 3.926338 1.4584491
##                   ACF1 Theil's U
## Training set 0.2719249        NA
## Test set     0.4679496 0.4859495

accuracy(usmelec.sn,usmelec.test)

##                    ME     RMSE       MAE      MPE     MAPE     MASE      ACF1
## Training set 4.921564 11.58553  8.998217 2.000667 3.511648 1.000000 0.4841860
## Test set     5.475972 17.20037 13.494750 1.391437 3.767514 1.499714 0.2692544
##              Theil's U
## Training set        NA
## Test set     0.4765145

accuracy(usmelec.stlf,usmelec.test)

##                       ME     RMSE       MAE         MPE     MAPE      MASE
## Training set  -0.0657726  6.19216  4.682101 -0.06613967 1.826421 0.5203366
## Test set     -13.0124627 17.48270 15.358336 -4.06887002 4.655355 1.7068199
##                   ACF1 Theil's U
## Training set 0.1106876        NA
## Test set     0.5244502 0.5593233

#The best conclusion that I can draw from all of these results is that the best model is going to depend ont he kind of data, whether or not it's seasonal, or how much variance there is year to year.

autoplot(forecast(ets(bicoal)))

#The ETS forecast does not appear all that useful
autoplot(forecast(ets(chicken)))

#The ETS forecast is not the best, but it is workable in this instance
autoplot(forecast(ets(dole)))

#The ETS forecast was good, as it reflected the increasing trend and seasonality.
autoplot(forecast(ets(usdeaths)))

#The ETS was good, as it reflected the seasonality of the data.
autoplot(forecast(ets(lynx)))

#The ETSis solid, but not great, the wide range of outcomes is not reflective of the data so far
autoplot(forecast(ets(ibmclose)))

#The ETS appears okay at first glance, however, it does not seem like it will be helpful going forward 
autoplot(forecast(ets(eggs)))

#The ETS is not as useful as it would initially appear to be, the decreasing trend is not well accounted for.

#b.

#The ibmclose dataset had issues with the ets forecast, as the large fluctuations in price that existed historically were nto accounted for in the forecast.

usdeaths_ets = ets(usdeaths,model = "MAM")
usdeaths_ets2 = forecast(usdeaths_ets, h=4)
usdeaths_hwM = hw(usdeaths, seasonal = 'multiplicative', h=1)
usdeaths_ets2$mean

##           Jan      Feb      Mar      Apr
## 1979 8233.107 7475.624 8285.738 8509.419

usdeaths_hwM$mean

##          Jan
## 1979 8217.64

#The results appear to be very similar

usdeaths_ets_ANN = ets(usdeaths, model = "ANN")
usdeaths_ets_F = forecast(usdeaths_ets_ANN,4)
usdeaths_ets_F

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 1979       9239.938 8296.855 10183.02 7797.618 10682.26
## Feb 1979       9239.938 7906.286 10573.59 7200.294 11279.58
## Mar 1979       9239.938 7606.583 10873.29 6741.938 11737.94
## Apr 1979       9239.938 7353.919 11125.96 6355.521 12124.35

sigma = 7.2637

alpha = .9999^2

h = 4

ci = sigma*(1+alpha*(h-1))

usdeaths_ets_F$mean[1]

## [1] 9239.938

usdeaths_ets_F$mean[1] - ci

## [1] 9210.887

usdeaths_ets_F$lower[1,'95%']

##      95% 
## 7797.618

# I believe this could be written out as Φµ,σ2(1.96) = 0.9750

#Chapter 8 #1.

#a.

#With all 3 figures, it appears that at no point do these lines go outside of the dashed lines, indicating that these appear like entirely white noise.

#b.

#The critical values narrow as the dataset gets larger, as larger datasets are less likely to appear correlary by accident.

#2.

autoplot(ibmclose)

ggtsdisplay(ibmclose)

#This data is certainly not stationary, as we would expect to see, as the ACF stays well above zero throughout, and PACF starts out with a large value, far away from the rest, indicative of non-stationary data.

ggtsdisplay(usnetelec)

#Not seasonal data, needed to use diff function with value of 1 as seen in PACF

diff_usnetelec = diff(usnetelec, 1)
ggtsdisplay(diff_usnetelec)

#b.

ggtsdisplay(usgdp)

bcl_usgdp = BoxCox.lambda(usgdp)

bcl_diff_usgdp = usgdp %>% 
  BoxCox(bcl_usgdp) %>% 
  diff(1)


ggtsdisplay(bcl_diff_usgdp)

#c.

ggtsdisplay(mcopper)

bcl_mcop = BoxCox.lambda(mcopper)

bcl_diff_mcop = mcopper %>% 
  BoxCox(bcl_mcop) %>% 
  diff(1) 

ggtsdisplay(bcl_diff_mcop)

#d.

ggtsdisplay(enplanements)

bcl_empl = BoxCox.lambda(enplanements)

bcl_diff_empl = enplanements %>% 
  BoxCox(bcl_empl) %>% 
  diff(12) %>% 
  diff(1)
ggtsdisplay(bcl_diff_empl)

#e.

ggtsdisplay(visitors)

bcl_visit = BoxCox.lambda(visitors)

bcl_diff_visit = visitors %>% 
  BoxCox(bcl_visit) %>% 
  diff(12) %>% 
  diff(1)

ggtsdisplay(bcl_diff_visit)

#y(t) = (1-B^12) * y(t)

autoplot(ts_retail)

ggtsdisplay(ts_retail)

bcl_retail = BoxCox.lambda(ts_retail)

bcl_diff_retail <- ts_retail %>% 
  BoxCox(bcl_retail) %>% 
  diff(1) %>% 
  diff(1)

ggtsdisplay(bcl_diff_retail)

#a.

phi1 = 0.6

y = ts(numeric(100))
e = rnorm(100)
for(i in 2:100){
   y[i] = phi1*y[i-1] + e[i]
}

#b.

func_ar1 = function(phi1){
  y = ts(numeric(100))
  e = rnorm(100)
  for(i in 2:100){
    y[i] = phi1*y[i-1] + e[i]
  }
  return(y)
}

autoplot(func_ar1(0.3), series = "0.3") +
  geom_line(size = 1, colour = "blue") +
  autolayer(y, series = "0.6", size = 1) +
  autolayer(func_ar1(0.9), size = 1, series = "0.9") +
  ylab("AR1 model") +
  guides(colour = guide_legend(title = "Phi1"))

#c.

theta1 = 0.6

func_ma1 = function(theta1){
  y = ts(numeric(100))
  e = rnorm(100)
  for(i in 2:100){
    y[i] = theta1*e[i-1] + e[i]
  }
  return(y)
}

#d.

autoplot(func_ma1(0.3), series = "0.3") +
  geom_line(size = 1, colour = "green") +
  autolayer(y, series = "0.6", size = 1) +
  autolayer(func_ma1(0.9), size = 1, series = "0.9") +
  ylab("MA(1) model") +
  guides(colour = guide_legend(title = "Theta1"))

#e.

arima_101 = ts(numeric(50))
e = rnorm(50)
for(i in 2:50){
   arima_101[i] = 0.6*arima_101[i-1] + 0.6*e[i-1] + e[i]
}

#f.

func_ar2 = ts(numeric(50))
e = rnorm(50)
for(i in 3:50){
   func_ar2[i] = -0.8*func_ar2[i-1] + 0.3*func_ar2[i-2] + e[i]
}

#g.

autoplot(arima_101, series = "ARMA (1,1)") +
  autolayer(func_ar2, series = "AR (2)") +
  ylab("y") +
  guides(colour = guide_legend(title = "Models"))

#The AR(2) model appears to have a rapidly increasing back and forth above and below 0, whereas ARMA (1,1) model looks more like a line of data we would expect to see in the real world.

#a.

autoplot(wmurders)

autoplot(diff(wmurders))

ndiffs(wmurders)

## [1] 2

autoplot(diff(wmurders, differences = 2))

ggtsdisplay((diff(wmurders, differences = 2)))

#The appropriate ARIMA model can be ARIMA(1,2,1).

#b.

#The data appears to not be seasonal nor does it have an overall trend, therefore including a constant in the model could introduce problems later on

#c.

#(1−ϕ1B)(1−B)2yt=(1+θ1B)εt

#d.

wm_fit = Arima(wmurders, order=c(1,2,1))

checkresiduals(wm_fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,2,1)
## Q* = 12.419, df = 8, p-value = 0.1335
## 
## Model df: 2.   Total lags used: 10

#Based on the residuals being mostly normally distributed, and all the values on the ACF being within the critical values, this model appears to be a solid one.

#e.

wm_fit_fc = forecast(wm_fit, h=3)

wm_fit_fc

##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005       2.470660 2.194836 2.746484 2.048824 2.892496
## 2006       2.363106 1.986351 2.739862 1.786908 2.939304
## 2007       2.252833 1.765391 2.740276 1.507354 2.998313

#f.

autoplot(wm_fit_fc)

#g.

auto_wm_fit = auto.arima(wmurders)

auto_wm_fit

## Series: wmurders 
## ARIMA(1,2,1) 
## 
## Coefficients:
##           ar1      ma1
##       -0.2434  -0.8261
## s.e.   0.1553   0.1143
## 
## sigma^2 estimated as 0.04632:  log likelihood=6.44
## AIC=-6.88   AICc=-6.39   BIC=-0.97

auto_wm_fit_fc = forecast(auto_wm_fit, h=3)

autoplot(auto_wm_fit_fc)

#Auto-Arima does give the same model as I have chosen

#a.

austa_arima = auto.arima(austa)

austa_arima

## Series: austa 
## ARIMA(0,1,1) with drift 
## 
## Coefficients:
##          ma1   drift
##       0.3006  0.1735
## s.e.  0.1647  0.0390
## 
## sigma^2 estimated as 0.03376:  log likelihood=10.62
## AIC=-15.24   AICc=-14.46   BIC=-10.57

#ARIMA(0,1,1) has been the model selected

fc_austa = forecast(austa_arima, h=10)

autoplot(fc_austa)

#b.

#ARIMA(0,1,1) is also the model from part a

#c.

arima_213 = arima(austa,order = c(2,1,3),method="ML")
arima_213

## 
## Call:
## arima(x = austa, order = c(2, 1, 3), method = "ML")
## 
## Coefficients:
##          ar1     ar2     ma1      ma2      ma3
##       0.0004  0.9996  0.4633  -0.9893  -0.4625
## s.e.  0.0031  0.0031  0.2283   0.0329   0.2261
## 
## sigma^2 estimated as 0.03059:  log likelihood = 9.24,  aic = -6.48

arima_213 %>% 
  forecast(h=10) %>% 
  autoplot()

austa3<-austa-mean(austa)
arima_213_nc<-arima(austa3,order = c(2,1,3),method="ML")
arima_213_nc

## 
## Call:
## arima(x = austa3, order = c(2, 1, 3), method = "ML")
## 
## Coefficients:
##          ar1     ar2     ma1      ma2      ma3
##       0.0006  0.9994  0.4534  -0.9878  -0.4538
## s.e.  0.0053  0.0053  0.2302   0.0371   0.2254
## 
## sigma^2 estimated as 0.03069:  log likelihood = 9.23,  aic = -6.47

arima_213_nc %>% 
  forecast(h=10) %>% 
  autoplot()

#d.

arima_001 = arima(austa,order = c(0,0,1),method="ML")
arima_001

## 
## Call:
## arima(x = austa, order = c(0, 0, 1), method = "ML")
## 
## Coefficients:
##          ma1  intercept
##       1.0000     3.5515
## s.e.  0.0907     0.3090
## 
## sigma^2 estimated as 0.8827:  log likelihood = -50.64,  aic = 107.28

arima_001 %>% 
  forecast(h=10) %>% 
  autoplot()

arima_000 = arima(austa,order = c(0,0,0),method="ML")
arima_000

## 
## Call:
## arima(x = austa, order = c(0, 0, 0), method = "ML")
## 
## Coefficients:
##       intercept
##          3.5414
## s.e.     0.2972
## 
## sigma^2 estimated as 3.179:  log likelihood = -71.9,  aic = 147.8

arima_000 %>% 
  forecast(h=10) %>% 
  autoplot()

#e.

arima_021 = arima(austa,order = c(0,2,1),method="ML")
arima_021

## 
## Call:
## arima(x = austa, order = c(0, 2, 1), method = "ML")
## 
## Coefficients:
##           ma1
##       -0.7262
## s.e.   0.2277
## 
## sigma^2 estimated as 0.03853:  log likelihood = 6.74,  aic = -9.48

arima_021 %>% 
  forecast(h=10) %>% 
  autoplot()

autoplot(usgdp)

bcl_usgdp = BoxCox.lambda(usgdp)

ndiffs(usgdp)

## [1] 2

bcl_diff_usgdp = usgdp %>% 
  BoxCox(bcl_usgdp) %>% 
  diff(2)

ggtsdisplay(bcl_diff_usgdp)

#b.

auto_gdp = auto.arima(usgdp)
auto_gdp

## Series: usgdp 
## ARIMA(2,2,2) 
## 
## Coefficients:
##           ar1     ar2      ma1      ma2
##       -0.1228  0.3106  -0.5835  -0.3669
## s.e.   0.2869  0.0872   0.3004   0.2862
## 
## sigma^2 estimated as 1604:  log likelihood=-1199.57
## AIC=2409.13   AICc=2409.39   BIC=2426.43

auto_gdp %>% 
  forecast(h=20) %>% 
  autoplot()

#c.

arima_001_gdp = arima(usgdp,order = c(0,0,1),method="ML")
arima_001_gdp

## 
## Call:
## arima(x = usgdp, order = c(0, 0, 1), method = "ML")
## 
## Coefficients:
##          ma1  intercept
##       1.0000  5173.1228
## s.e.  0.0113   180.8189
## 
## sigma^2 estimated as 1945425:  log likelihood = -2055.02,  aic = 4116.04

arima_001_gdp %>% 
  forecast(h=8) %>% 
  autoplot()

arima_121_gdp = arima(usgdp,order = c(1,2,1),method="ML")
arima_121_gdp

## 
## Call:
## arima(x = usgdp, order = c(1, 2, 1), method = "ML")
## 
## Coefficients:
##          ar1      ma1
##       0.2899  -0.9555
## s.e.  0.0668   0.0197
## 
## sigma^2 estimated as 1651:  log likelihood = -1204.96,  aic = 2415.92

arima_121_gdp %>% 
  forecast(h=8) %>% 
  autoplot()

arima_000_gdp = arima(usgdp,order = c(0,0,0),method="ML")
arima_000_gdp

## 
## Call:
## arima(x = usgdp, order = c(0, 0, 0), method = "ML")
## 
## Coefficients:
##       intercept
##       5168.4692
## s.e.   179.4869
## 
## sigma^2 estimated as 7635091:  log likelihood = -2214.31,  aic = 4432.62

arima_000_gdp %>% 
  forecast(h=8) %>% 
  autoplot()

#d.

#I would be inclined to go with the autoarima model, as it appears to follow the recent trend of the data more closely than the others

checkresiduals(auto_gdp)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,2,2)
## Q* = 8.6247, df = 4, p-value = 0.0712
## 
## Model df: 4.   Total lags used: 8

#e.

#The forecast was already created as a part of b, and I would say the forecast looks reasonable. 

auto_gdp %>% 
  forecast(h=20) %>% 
  autoplot()

#f.

ets_usgdp = ets(usgdp)

ets_usgdp %>% 
  forecast(h=20) %>% 
  autoplot()

autoplot(austourists)

tsdisplay(austourists)

#There appears to be a slight upward trend to the data as well as a high level of seasonality

#b.

#The lagged observations appear to have some level of autocorrelation occurring

#c.

#The seasonality of the data needs to be accounted for when we are trying to forecast, if we want to ensure some level of accuracy

#d.

dec_tour = decompose(austourists, "multiplicative")
adj_tour =  austourists / dec_tour$seasonal
diff_adj_tour = diff(adj_tour)
tsdisplay(diff_adj_tour)

#I believe this would suggest a (1,0,0) model since we are looking for a single lag period

#e.

auto_tour = auto.arima(austourists)

auto_tour %>% 
  forecast(h=4) %>% 
  autoplot()

#It looks like the auto arima gives the same model.

#f.

#(1−B4)Yt=BY(t−1)+et

autoplot(usmelec)

movAvg <- ma(usmelec, order=12)

autoplot(movAvg)

#b.

#The data will likely need transforming, as there is a long, upward trend in electrical consumption over time


bcl_elec = BoxCox.lambda(movAvg)

ndiffs(movAvg)

## [1] 1

bcl_diff_elec = movAvg %>% 
  BoxCox(bcl_elec) %>% 
  diff(1)

ggtsdisplay(bcl_diff_elec)

## Warning: Removed 12 rows containing missing values (geom_point).

#c.

#The data appears to be seasonal and therefore not stationary

#d.

arima_001_elec = arima(usmelec,order = c(0,0,1),method="ML")
arima_001_elec

## 
## Call:
## arima(x = usmelec, order = c(0, 0, 1), method = "ML")
## 
## Coefficients:
##          ma1  intercept
##       0.8483   259.5482
## s.e.  0.0156     3.4307
## 
## sigma^2 estimated as 1678:  log likelihood = -2494.55,  aic = 4995.09

arima_001_elec %>% 
  forecast(h=12) %>% 
  autoplot()

arima_121_elec = arima(usmelec,order = c(1,2,1),method="ML")
arima_121_elec

## 
## Call:
## arima(x = usmelec, order = c(1, 2, 1), method = "ML")
## 
## Coefficients:
##          ar1      ma1
##       0.1334  -1.0000
## s.e.  0.0452   0.0052
## 
## sigma^2 estimated as 561.8:  log likelihood = -2221.87,  aic = 4449.73

arima_121_elec %>% 
  forecast(h=12) %>% 
  autoplot()

arima_621_elec = arima(usmelec,order = c(6,1,1),method="ML")
arima_621_elec

## 
## Call:
## arima(x = usmelec, order = c(6, 1, 1), method = "ML")
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5      ar6     ma1
##       -0.4736  -0.2502  -0.5709  -0.5812  -0.0011  -0.1411  0.4707
## s.e.   0.0852   0.0500   0.0443   0.0520   0.0559   0.0500  0.0753
## 
## sigma^2 estimated as 316.6:  log likelihood = -2085.49,  aic = 4186.98

arima_621_elec %>% 
  forecast(h=12) %>% 
  autoplot()

#e.

checkresiduals(arima_621_elec)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(6,1,1)
## Q* = 749.39, df = 17, p-value < 2.2e-16
## 
## Model df: 7.   Total lags used: 24

#The normal distribution of the residuals suggests this is a decent model, but the ACF chart shows that there is still much room for improvement

#f.

arima_621_elec = arima(usmelec,order = c(6,1,1),method="ML")
arima_621_elec

## 
## Call:
## arima(x = usmelec, order = c(6, 1, 1), method = "ML")
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5      ar6     ma1
##       -0.4736  -0.2502  -0.5709  -0.5812  -0.0011  -0.1411  0.4707
## s.e.   0.0852   0.0500   0.0443   0.0520   0.0559   0.0500  0.0753
## 
## sigma^2 estimated as 316.6:  log likelihood = -2085.49,  aic = 4186.98

arima_621_elec %>% 
  forecast(h=180) %>% 
  autoplot()

#Based on page 19 of the pdf showing electrical consumption, there was indeed a leveling off of electicity use, so the model appears to be somewhat accurate.

#a.

autoplot(mcopper)

bcl_mcop = BoxCox.lambda(mcopper)

ndiffs(mcopper)

## [1] 1

bcl_diff_mcop = mcopper %>% 
  BoxCox(bcl_mcop) %>% 
  diff(1)

ggtsdisplay(bcl_diff_mcop)

#b.

auto_mcop = auto.arima(mcopper)
auto_mcop

## Series: mcopper 
## ARIMA(0,1,1) 
## 
## Coefficients:
##          ma1
##       0.2900
## s.e.  0.0419
## 
## sigma^2 estimated as 6026:  log likelihood=-3248.53
## AIC=6501.07   AICc=6501.09   BIC=6509.73

auto_mcop %>% 
  forecast(h=12) %>% 
  autoplot()

#c.

arima_001_mcop = arima(mcopper,order = c(0,0,1),method="ML")
arima_001_mcop

## 
## Call:
## arima(x = mcopper, order = c(0, 0, 1), method = "ML")
## 
## Coefficients:
##          ma1  intercept
##       0.9451   998.6773
## s.e.  0.0087    26.3482
## 
## sigma^2 estimated as 103672:  log likelihood = -4058.21,  aic = 8122.43

arima_001_mcop %>% 
  forecast(h=12) %>% 
  autoplot()

arima_221_mcop = arima(mcopper,order = c(2,2,1),method="ML")
arima_221_mcop

## 
## Call:
## arima(x = mcopper, order = c(2, 2, 1), method = "ML")
## 
## Coefficients:
##          ar1      ar2      ma1
##       0.2803  -0.0505  -0.9980
## s.e.  0.0427   0.0435   0.0152
## 
## sigma^2 estimated as 6030:  log likelihood = -3245.88,  aic = 6499.76

arima_221_mcop %>% 
  forecast(h=12) %>% 
  autoplot()

arima_121_mcop = arima(mcopper,order = c(1,2,1),method="ML")
arima_121_mcop

## 
## Call:
## arima(x = mcopper, order = c(1, 2, 1), method = "ML")
## 
## Coefficients:
##          ar1      ma1
##       0.2684  -1.0000
## s.e.  0.0410   0.0139
## 
## sigma^2 estimated as 6036:  log likelihood = -3246.57,  aic = 6499.14

arima_121_mcop %>% 
  forecast(h=12) %>% 
  autoplot()

#d.

#I would use the auto arima, as there is a higher liklihood of a better forcast.

checkresiduals(auto_mcop)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)
## Q* = 30.716, df = 23, p-value = 0.1299
## 
## Model df: 1.   Total lags used: 24

#e.

auto_mcop %>% 
  forecast(h=24) %>% 
  autoplot()

#The range of outcomes in the forecast appear to be reasonable, but I would not expect the forecast to level off

#f.

fc_mcop_ets = forecast(ets(mcopper), h=24)
autoplot(fc_mcop_ets)

#The ets model looks like it accounts for the recent, downward trend in the data better than the auto-arima model

autoplot(qauselec)

bcl_qauselec <- BoxCox.lambda(qauselec)

#BoxCox transformation necessary due to the sustained upward and seasonal trends in the data

#b.

#The data is not stationary, as previously included, the upward trend and seasonal trends 

nsdiffs(qauselec)

## [1] 1

ndiffs(qauselec)

## [1] 1

#c.

arima_401_qaus = arima(qauselec,order = c(4,0,1),method="ML")

## Warning in arima(qauselec, order = c(4, 0, 1), method = "ML"): possible
## convergence problem: optim gave code = 1

arima_401_qaus

## 
## Call:
## arima(x = qauselec, order = c(4, 0, 1), method = "ML")
## 
## Coefficients:
##          ar1     ar2     ar3     ar4     ma1  intercept
##       0.0530  0.1289  0.0105  0.8070  0.5517    30.8648
## s.e.  0.0451  0.0417  0.0395  0.0382  0.0670    26.4027
## 
## sigma^2 estimated as 1.104:  log likelihood = -325.01,  aic = 664.02

arima_401_qaus %>% 
  forecast(h=12) %>% 
  autoplot()

arima_611_qaus = arima(qauselec,order = c(6,1,1),method="ML")
arima_611_qaus

## 
## Call:
## arima(x = qauselec, order = c(6, 1, 1), method = "ML")
## 
## Coefficients:
##           ar1      ar2      ar3     ar4     ar5      ar6      ma1
##       -0.3429  -0.1216  -0.1308  0.7239  0.1843  -0.0621  -0.0666
## s.e.   0.3883   0.1757   0.0861  0.0720  0.2853   0.1176   0.3831
## 
## sigma^2 estimated as 0.7542:  log likelihood = -279.64,  aic = 575.29

arima_611_qaus %>% 
  forecast(h=12) %>% 
  autoplot()

arima_610_qaus = arima(qauselec,order = c(6,1,0),method="ML")
arima_610_qaus

## 
## Call:
## arima(x = qauselec, order = c(6, 1, 0), method = "ML")
## 
## Coefficients:
##           ar1      ar2      ar3     ar4     ar5      ar6
##       -0.4097  -0.1507  -0.1428  0.7151  0.2330  -0.0439
## s.e.   0.0679   0.0714   0.0522  0.0518  0.0712   0.0678
## 
## sigma^2 estimated as 0.7543:  log likelihood = -279.66,  aic = 573.32

arima_610_qaus %>% 
  forecast(h=12) %>% 
  autoplot()

arima_410_qaus = arima(qauselec,order = c(4,1,0), seasonal = c(0,1,2) ,method="ML")
arima_410_qaus

## 
## Call:
## arima(x = qauselec, order = c(4, 1, 0), seasonal = c(0, 1, 2), method = "ML")
## 
## Coefficients:
##           ar1      ar2      ar3     ar4     sma1    sma2
##       -0.4774  -0.3657  -0.2715  0.3116  -1.3807  0.5440
## s.e.   0.0766   0.0747   0.0750  0.1627   0.1818  0.1386
## 
## sigma^2 estimated as 0.5112:  log likelihood = -233.26,  aic = 480.51

arima_410_qaus %>% 
  forecast(h=12) %>% 
  autoplot()

auto_quas = auto.arima(qauselec)
auto_quas

## Series: qauselec 
## ARIMA(1,0,1)(1,1,2)[4] with drift 
## 
## Coefficients:
##          ar1      ma1    sar1     sma1    sma2   drift
##       0.9158  -0.4411  0.8628  -1.6921  0.7737  0.2299
## s.e.  0.0467   0.0942  0.1420   0.1425  0.0954  0.0456
## 
## sigma^2 estimated as 0.5082:  log likelihood=-230.58
## AIC=475.16   AICc=475.71   BIC=498.72

auto_quas %>% 
  forecast(h=12) %>% 
  autoplot()

#d.

checkresiduals(auto_quas)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,1)(1,1,2)[4] with drift
## Q* = 14.822, df = 3, p-value = 0.001975
## 
## Model df: 6.   Total lags used: 9

#The residuals do not appear to be white noise

checkresiduals(arima_401_qaus)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,1) with non-zero mean
## Q* = 47.525, df = 3, p-value = 2.688e-10
## 
## Model df: 6.   Total lags used: 9

checkresiduals(arima_611_qaus)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(6,1,1)
## Q* = 34.856, df = 3, p-value = 1.307e-07
## 
## Model df: 7.   Total lags used: 10

checkresiduals(arima_610_qaus)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(6,1,0)
## Q* = 29.737, df = 3, p-value = 1.567e-06
## 
## Model df: 6.   Total lags used: 9

checkresiduals(arima_410_qaus)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,1,0)(0,1,2)[4]
## Q* = 17.275, df = 3, p-value = 0.0006205
## 
## Model df: 6.   Total lags used: 9

#e.

arima_410_qaus %>% 
  forecast(h=24) %>% 
  autoplot()

#f.

fc_qaus_ets = forecast(
  ets(qauselec), h = 24
)
autoplot(fc_qaus_ets)

fc_stlf_quas = stlf(qauselec, lambda = BoxCox.lambda(qauselec), s.window = 5, robust = TRUE, 
                    method = "arima", h = 24)
autoplot(fc_stlf_quas)

checkresiduals(fc_stlf_quas)

## Warning in checkresiduals(fc_stlf_quas): The fitted degrees of freedom is based
## on the model used for the seasonally adjusted data.

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ARIMA(0,1,1) with drift
## Q* = 8.0648, df = 6, p-value = 0.2334
## 
## Model df: 2.   Total lags used: 8

#Based on the checkresiduals results, I would say that the stlf is the best way to forecast this particular dataset.

ts_arima_retail = auto.arima(ts_retail)

ts_arima_retail

## Series: ts_retail 
## ARIMA(1,0,2)(0,1,1)[12] with drift 
## 
## Coefficients:
##          ar1      ma1      ma2     sma1   drift
##       0.9328  -0.2745  -0.1676  -0.2989  0.9843
## s.e.  0.0267   0.0608   0.0553   0.0551  0.3479
## 
## sigma^2 estimated as 198.4:  log likelihood=-1498.1
## AIC=3008.21   AICc=3008.44   BIC=3031.67

#b.

ts_retail %>% 
  forecast(h=36) %>% 
  autoplot()

damp_hw_retail %>% 
  forecast(h=36) %>% 
  autoplot()

#c.

library(readxl)
X8501011 <- read_excel("C:/Users/dmcsw/Downloads/8501011.xlsx", 
    sheet = "Data1", skip = 9)

retail_new = X8501011

retail_new_ts = ts(retail_new[, "A3349873A"],
                    start = c(1982, 4),
                    frequency = 12)
retail_new_test = subset(
  retail_new_ts,
  start = length(ts_retail) + 1
  )

autoplot(retail_new_test) +
  autolayer(forecast(ts_arima_retail, h=36))

#The forecasts appear to capture the trend, but not the results

#a.

ts_sheep = ts(sheep)

autoplot(ts_sheep)

#b.

# (yt - yt-1) - phi1(yt-1 - yt-2) - phi2(yt-2 - yt-3) - phi3(yt-3 - yt-4) = et
# (1 - B)yt - phi1*B(1- B)yt - phi2*B^2(1- B)yt - phi3*B^3(1- B)yt = et
# (1 - phi1*B - phi2*B^2 - phi3*B^3)(1 - B)yt = et
# It is ARIMA(3, 1, 0) model.

#c.

arima_310_sheep = arima(sheep,order = c(3,1,0),method="ML")
arima_310_sheep

## 
## Call:
## arima(x = sheep, order = c(3, 1, 0), method = "ML")
## 
## Coefficients:
##          ar1      ar2      ar3
##       0.4210  -0.2018  -0.3043
## s.e.  0.1193   0.1363   0.1243
## 
## sigma^2 estimated as 4783:  log likelihood = -407.56,  aic = 823.12

arima_310_sheep %>% 
  forecast(h=12) %>% 
  autoplot()

checkresiduals(arima_310_sheep)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,1,0)
## Q* = 3.4032, df = 7, p-value = 0.8454
## 
## Model df: 3.   Total lags used: 10

#d.

sheep.1940 = 1797 + 0.42*(1797 - 1791) -0.20*(1791 - 1627) - 0.30*(1627 - 1665)
sheep.1941 = sheep.1940 + 0.42*(sheep.1940 - 1797) -0.20*(1797 - 1791) - 0.30*(1791 - 1627)
sheep.1942 = sheep.1941 + 0.42*(sheep.1941 - sheep.1940) -0.20*(sheep.1940 - 1797) - 0.30*(1797 - 1791)
c(sheep.1940, sheep.1941, sheep.1942)

## [1] 1778.120 1719.790 1697.268

#e.

arima_310_sheep = arima(sheep,order = c(3,1,0),method="ML")
arima_310_sheep

## 
## Call:
## arima(x = sheep, order = c(3, 1, 0), method = "ML")
## 
## Coefficients:
##          ar1      ar2      ar3
##       0.4210  -0.2018  -0.3043
## s.e.  0.1193   0.1363   0.1243
## 
## sigma^2 estimated as 4783:  log likelihood = -407.56,  aic = 823.12

arima_310_sheep %>% 
  forecast(h=12) %>% 
  autoplot()

#The differences in the values of the given and calculated coefficients would make a difference in the projections

ts_bicoal = ts(bicoal)

autoplot(ts_bicoal)

#b.

#(1 - phi1*B - phi2*B^2 - phi3*B^3 - phi4*B^4)*yt = c + et
# c = mu*(1 - phi1*B - phi2*B^2 - phi3*B^3 - phi4*B^4)
# This model is ARIMA(4, 0, 0).

#c.

arima_400_coal = arima(bicoal,order = c(4,0,0),method="ML")
arima_400_coal

## 
## Call:
## arima(x = bicoal, order = c(4, 0, 0), method = "ML")
## 
## Coefficients:
##          ar1      ar2     ar3      ar4  intercept
##       0.8334  -0.3444  0.5525  -0.3780   481.5106
## s.e.  0.1366   0.1752  0.1733   0.1414    21.0569
## 
## sigma^2 estimated as 2509:  log likelihood = -262.05,  aic = 536.1

arima_400_coal %>% 
  forecast(h=12) %>% 
  autoplot()

ggAcf(bicoal, lag.max = 36)

ggPacf(bicoal, lag.max = 36)

checkresiduals(arima_400_coal)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,0) with non-zero mean
## Q* = 4.8533, df = 5, p-value = 0.434
## 
## Model df: 5.   Total lags used: 10

#With the ACF plot, the values largely stay within the critical values, after the 3 initial values. The PACF shows that with a lag value of 4.

#d.

c = 162.00
phi1 = 0.83 
phi2 = -0.34
phi3 = 0.55
phi4 = -0.38
bicoal.1969 <- c + phi1*545 + phi2*552 + phi3*534 + phi4*512
bicoal.1970 <- c + phi1*bicoal.1969 + phi2*545 + phi3*552 + phi4*534
bicoal.1971 <- c + phi1*bicoal.1970 + phi2*bicoal.1969 + phi3*545 + phi4*552
c(bicoal.1969, bicoal.1970, bicoal.1971)

## [1] 525.8100 513.8023 499.6705

#e.

arima_400_coal = arima(bicoal,order = c(4,0,0),method="ML")
arima_400_coal

## 
## Call:
## arima(x = bicoal, order = c(4, 0, 0), method = "ML")
## 
## Coefficients:
##          ar1      ar2     ar3      ar4  intercept
##       0.8334  -0.3444  0.5525  -0.3780   481.5106
## s.e.  0.1366   0.1752  0.1733   0.1414    21.0569
## 
## sigma^2 estimated as 2509:  log likelihood = -262.05,  aic = 536.1

arima_400_coal %>% 
  forecast(h=12) %>% 
  autoplot()

#The forecast makes sense in the context of previous available data.

y <- Quandl("FRED/NROUST", api_key="duzxAd-Usc3TA4-vdQRx", type="ts")

#b.

autoplot(y)

unemp_arima = auto.arima(y)

unemp_arima

## Series: y 
## ARIMA(3,2,3) 
## 
## Coefficients:
##          ar1     ar2      ar3      ma1      ma2     ma3
##       0.0930  0.3693  -0.3294  -0.3478  -0.3050  0.8710
## s.e.  0.0695  0.0651   0.0612   0.0375   0.0368  0.0281
## 
## sigma^2 estimated as 9.266e-05:  log likelihood=1066.1
## AIC=-2118.19   AICc=-2117.84   BIC=-2091.6

#Autoarima came up with an Arima (3,2,3) model

#c.

checkresiduals(unemp_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,2,3)
## Q* = 34.006, df = 3, p-value = 1.976e-07
## 
## Model df: 6.   Total lags used: 9

#The residuals are not white noise.

#d.

unemp_arima_fc = forecast(unemp_arima, h=48)

unemp_arima_fc %>% 
  autoplot()

#e.

unemp_ets = ets(y)

unemp_ets

## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = y) 
## 
##   Smoothing parameters:
##     alpha = 0.9002 
##     beta  = 0.9002 
##     phi   = 0.9117 
## 
##   Initial states:
##     l = 5.2505 
##     b = 0.009 
## 
##   sigma:  0.002
## 
##       AIC      AICc       BIC 
## -1089.744 -1089.485 -1066.913

#f.

checkresiduals(unemp_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,Ad,N)
## Q* = 59.562, df = 3, p-value = 7.292e-13
## 
## Model df: 5.   Total lags used: 8

#The residuals are not white noise

#g.

unemp_ets %>% 
  forecast(h=48) %>% 
  autoplot()

#h.

#To me, the ARIMA model appears to be better in line with where the data is heading, the ACF and the residuals also look more promising for a forecasting model

Assignment 2 Chapter 7 & 8

Dan McSwain

11/28/2021