Exercises Chapter 8

Exercise 1

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset

pigs <- aus_livestock[aus_livestock$Animal=="Pigs",]
str(pigs)
## tbl_ts [4,464 x 4] (S3: tbl_ts/tbl_df/tbl/data.frame)
##  $ Month : mth [1:4464] 1972 jul., 1972 ago., 1972 sept., 1972 oct., 1972 nov., 1...
##  $ Animal: Factor w/ 7 levels "Bulls, bullocks and steers",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ State : Factor w/ 8 levels "Australian Capital Territory",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Count : num [1:4464] 1700 1500 1700 1500 2000 2000 1900 2200 2400 2200 ...
##  - attr(*, "key")= tibble [8 x 3] (S3: tbl_df/tbl/data.frame)
##   ..$ Animal: Factor w/ 7 levels "Bulls, bullocks and steers",..: 6 6 6 6 6 6 6 6
##   ..$ State : Factor w/ 8 levels "Australian Capital Territory",..: 1 2 3 4 5 6 7 8
##   ..$ .rows : list<int> [1:8] 
##   .. ..$ : int [1:558] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..$ : int [1:558] 559 560 561 562 563 564 565 566 567 568 ...
##   .. ..$ : int [1:558] 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 ...
##   .. ..$ : int [1:558] 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 ...
##   .. ..$ : int [1:558] 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 ...
##   .. ..$ : int [1:558] 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 ...
##   .. ..$ : int [1:558] 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 ...
##   .. ..$ : int [1:558] 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 ...
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
##  - attr(*, "index")= chr "Month"
##   ..- attr(*, "ordered")= logi TRUE
##  - attr(*, "index2")= chr "Month"
##  - attr(*, "interval")= interval [1:1] 1M
##   ..@ .regular: logi TRUE
head(pigs)
## # A tsibble: 6 x 4 [1M]
## # Key:       Animal, State [1]
##        Month Animal State                        Count
##        <mth> <fct>  <fct>                        <dbl>
## 1  1972 jul. Pigs   Australian Capital Territory  1700
## 2  1972 ago. Pigs   Australian Capital Territory  1500
## 3 1972 sept. Pigs   Australian Capital Territory  1700
## 4  1972 oct. Pigs   Australian Capital Territory  1500
## 5  1972 nov. Pigs   Australian Capital Territory  2000
## 6  1972 dic. Pigs   Australian Capital Territory  2000
pigs<-ts(pigs$Count,start=c(1972,7),end=c(1996,4), frequency=12)
autoplot(pigs)

#### a. Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0, and generate forecasts for the next four months.

library(forecast)
## Warning: package 'forecast' was built under R version 4.0.5
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
##   method                 from     
##   autoplot.Arima         ggfortify
##   autoplot.acf           ggfortify
##   autoplot.ar            ggfortify
##   autoplot.bats          ggfortify
##   autoplot.decomposed.ts ggfortify
##   autoplot.ets           ggfortify
##   autoplot.forecast      ggfortify
##   autoplot.stl           ggfortify
##   autoplot.ts            ggfortify
##   fitted.ar              ggfortify
##   fortify.ts             ggfortify
##   residuals.ar           ggfortify
ses1 = ses(pigs, h=4)
ses1$model
## Simple exponential smoothing 
## 
## Call:
##  ses(y = pigs, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.3573 
## 
##   Initial states:
##     l = 1695.1154 
## 
##   sigma:  759.4864
## 
##      AIC     AICc      BIC 
## 5415.478 5415.563 5426.446

Here we see the optimal values of alpha and Lo

forecast(ses1)
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 1996       5217.003 4243.682 6190.324 3728.437 6705.569
## Jun 1996       5217.003 4183.419 6250.587 3636.273 6797.733
## Jul 1996       5217.003 4126.481 6307.525 3549.194 6884.812
## Aug 1996       5217.003 4072.372 6361.634 3466.441 6967.565

b. Compute a 95% prediction interval for the first forecast using y +- 1.96s. where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

ses1$upper[1, "95%"]
##      95% 
## 6705.569
ses1$lower[1, "95%"]
##      95% 
## 3728.437
s <- sd(ses1$residuals)
ses1$mean[1] + 1.96*s
## [1] 6701.441
ses1$mean[1] - 1.96*s
## [1] 3732.565
autoplot(ses1) +
  autolayer(ses1$fitted)

The interval is between 3732.565 and 6701.441

Exercise 2

Write your own function to implement simple exponential smoothing. The function should take arguments y (time series), alpha (parameter), level (the initial lo). It should return the forecast of the next observation in the series. Does it give the same forecast as ETS()?

SES <- function(y, alpha, l0){
  y_hat <- l0
  for(index in 1:length(y)){
   y_hat <- alpha*y[index] + (1 - alpha)*y_hat 
  }
  cat("Forecast of next observation by SES function: ",
      as.character(y_hat),
      sep = "\n")
}
alpha <- ses1$model$par[1]
l0 <- ses1$model$par[2]

SES(pigs, alpha = alpha, l0 = l0)
## Forecast of next observation by SES function: 
## 5217.00313129341
writeLines(paste(
  "Forecast of next observation by ses function: ",       as.character(ses1$mean[1])
  ))
## Forecast of next observation by ses function:  5217.00313129341

The function obtained the same result as the package function.

Exercise 3

Modify your function from the previous exercise to return the sum of squared errors rather than the forecast of the next observation. Then use the optim() function to find the optimal values of α and ℓ0. Do you get the same values as the ETS() function?

SES <- function(pars = c(alpha, l0), y){
  # we have to calculate differences.
  error <- 0
  SSE <- 0
  alpha <- pars[1]
  l0 <- pars[2]
  y_hat <- l0
  
  for(index in 1:length(y)){
    error <- y[index] - y_hat
    SSE <- SSE + error^2
    
    y_hat <- alpha*y[index] + (1 - alpha)*y_hat 
  }
  
  return(SSE)
}
opt_SES_pigs <- optim(par = c(0.5, pigs[1]), y = pigs, fn = SES)
writeLines(paste(
  "Optimal parameters for the result of SES function: ",
  "\n",
  as.character(opt_SES_pigs$par[1]),
  ", ",
  as.character(opt_SES_pigs$par[2]),
  sep = ""
  ))
## Optimal parameters for the result of SES function: 
## 0.357266289764084, 1700.86503303814
writeLines(paste(
  "Parameters got from the result of ses function: ",
  "\n",
  as.character(ses1$model$par[1]),
  ", ",
  as.character(ses1$model$par[2]),
  sep = ""
))
## Parameters got from the result of ses function: 
## 0.357299836689769, 1695.11538483805

No, the values are not the same as the values that were obtained with the ETS function. We see the bigger difference in the Lo parameter.

Exercise 4

Combine your previous two functions to produce a function that both finds the optimal values of α and ℓ0, and produces a forecast of the next observation in the series.

SES <- function(init_pars, data){
  # make next observation forecast variable
  fc_next <- 0
  SSE <- function(pars, data){
    error <- 0
    SSE <- 0
    alpha <- pars[1]
    l0 <- pars[2]
    y_hat <- l0
    
    for(index in 1:length(data)){
      error <- data[index] - y_hat
      SSE <- SSE + error^2
      
      y_hat <- alpha*data[index] + (1 - alpha)*y_hat 
    }
    # use superassignment.
    fc_next <<- y_hat
    return(SSE)
  }
  
  # use optim function to get optimal values of alpha and l0.
  optim_pars <- optim(par = init_pars, data = data, fn = SSE)
  
  #results
  return(list(
    Next_observation_forecast = fc_next,
    alpha = optim_pars$par[1],
    l0 = optim_pars$par[2]
    ))
}
# compare the result using pigs data
SES(c(0.5, pigs[1]), pigs)
## $Next_observation_forecast
## [1] 5216.963
## 
## $alpha
## [1] 0.3572663
## 
## $l0
## [1] 1700.865
print("Next observation forecast by ses function")
## [1] "Next observation forecast by ses function"
ses1$mean[1]
## [1] 5217.003
print("alpha calculated by ses function")
## [1] "alpha calculated by ses function"
ses1$model$par[1]
##     alpha 
## 0.3572998
print("l0 calculated by ses function")
## [1] "l0 calculated by ses function"
ses1$model$par[2]
##        l 
## 1695.115

In terms of the next observation we can see that the function is given us a similar value in comparation of the package function.

Exercise 5

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

global_economy %>%
  tsibble(key = Code, index = Year)%>%
  autoplot(Exports, show.legend =  FALSE) +
  labs(title= "Exports by country",
       y = "USD")
## Warning: Removed 4420 row(s) containing missing values (geom_path).

df <- global_economy %>% filter(Code =="BEL")

a. Plot the Exports series and discuss the main features of the data.

global_economy %>%
  tsibble(key = Code, index = Year)%>%
  filter(Country == "Belgium") %>%
  autoplot(Exports, show.legend =  FALSE) +
  labs(title= "Exports by country-Belgium",
       y = "USD")

b. Use an ETS(A,N,N) model to forecast the series, and plot the forecasts..

fit <-df %>%
  model(ANN= ETS(Exports ~error("A")+trend("N")+season("N")))
fc <- fit  %>%forecast(h=4)
fc  %>% autoplot(df)+
  labs(title="Forecaste Belgium exports")

c. Compute the RMSE values for the training data..

fit %>% accuracy()
## # A tibble: 1 x 11
##   Country .model .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <fct>   <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Belgium ANN    Training 0.877  3.23  2.52  1.35  4.30 0.987 0.989 -0.0642

The RMSE value is 3.227.

d. Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

fit2 <-df %>%
  model(
    ANN= ETS(Exports ~error("A")+trend("N")+season("N")),
    AAN= ETS(Exports ~error("A")+trend("A")+season("N")))
accuracy(fit2)
## # A tibble: 2 x 11
##   Country .model .type          ME  RMSE   MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <fct>   <chr>  <chr>       <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Belgium ANN    Training 0.877     3.23  2.52  1.35   4.30 0.987 0.989 -0.0642
## 2 Belgium AAN    Training 0.000848  3.08  2.31 -0.232  3.97 0.905 0.943  0.0980

The AAN model reduce the RMSE of the model in training

e. Compare the forecasts from both methods. Which do you think is best?.

fit2 %>%
  forecast(h=4) %>%
  autoplot(df,level=NULL) %>%
  labs(title="Foreacst comparison ANN vs AAN")
## [[1]]

## 
## $title
## [1] "Foreacst comparison ANN vs AAN"
## 
## attr(,"class")
## [1] "labels"

As we see the AAN model present a lower value of RMSE and in the forecast we see that the AAN model seems to be better.

f.Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

cmp <-fit2 %>%
  dplyr::select(Country,AAN)%>%
  accuracy()%>%
  transmute(Country,s=RMSE)

fit2%>%
  dplyr::select(Country,ANN)%>%
  forecast(h=1)%>%
  left_join(cmp,by="Country")%>%
  mutate(low= .mean -1.96*cmp$s,
         high= .mean -1.96*cmp$s)%>%
  dplyr::select(Country,Exports,low,high)
## # A fable: 1 x 5 [?]
## # Key:     Country [1]
##   Country   Exports   low  high  Year
##   <fct>      <dist> <dbl> <dbl> <dbl>
## 1 Belgium N(85, 11)  78.8  78.8  2018

Exercise 6

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts

china <-global_economy %>%
  filter(Country == "China")

china %>% autoplot(GDP) +
  labs(title = "GDP of China")

The GDP of china has a exponential increase along the time serie. So here I will try to apply a Box-Cox transformation.

lambda_china <- china %>%
                  features(GDP, features = guerrero) %>%
                  pull(lambda_guerrero)
china_tfs<-china %>% autoplot(box_cox(GDP,lambda_china))
china_tfs

The graph shows that the transformation was helpful, decreasing the variance.

fit <- china %>%
  model(
    ets=ETS(GDP),
    ets_damped =ETS(GDP ~ trend("Ad")),
    ets_bc =ETS(box_cox(GDP,lambda_china)),
    ets_log=ETS(log(GDP))
  )

fit %>%
  forecast(h="10 years")%>%
  autoplot(china,level=NULL)%>%
  labs(title="Foreacast of China GDP with differente models")
## [[1]]

## 
## $title
## [1] "Foreacast of China GDP with differente models"
## 
## attr(,"class")
## [1] "labels"

It seems that the models with transformations woks better.

Exercise 7

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_production  %>% autoplot(Gas)

The graph shows that the seasonal variation increases with time, so it will be expected that a Box Cox transformation will be helpful in this case. Here the result.

fit_gas <-aus_production  %>%
  filter(Quarter > yearquarter("1991 Q1"))%>%
  model(fit=ETS(Gas))
report(fit_gas)
## Series: Gas 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.465602 
##     beta  = 0.0005944637 
##     gamma = 0.0001109316 
## 
##   Initial states:
##      l[0]     b[0]      s[0]     s[-1]    s[-2]    s[-3]
##  134.5169 1.184594 -24.17026 -12.85238 26.25557 10.76707
## 
##   sigma^2:  28.9561
## 
##      AIC     AICc      BIC 
## 603.1913 605.8779 624.2856
fit_gas %>% 
  forecast(h="5 years")%>%
  autoplot(aus_production)

Here we forecast 5 years of gas production with a ETS model. We see that the optimice model is AAA so we are not using multiplicative components.

Exercise 8

Recall your retail time series data (from Exercise 8 in Section 2.10).

a. Why is multiplicative seasonality necessary for this series?

retail <-aus_retail %>%
  filter(`Series ID` == "A3349337W")

retail %>% autoplot(Turnover)

In the graph we see that the seasonality increases in time, so is a better approach use a multiplicative seasonality model because additive is better when you have constant seasonality.

b. Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fit <- retail %>%
  model(
    "Holt-Winters’ Damped" = ETS(Turnover ~ error("M")+trend("Ad")+season("M")),
    "Holt-Winters’ Multiplicative" = ETS(Turnover ~ error("M")+trend("A")+season("M"))
  )
forecast1 <- fit %>% forecast(h=" 10 years")
forecast1 %>%
  autoplot(retail,level=NULL) +
  labs(title="Retail in Australia",y="Turnover") +
  guides(colour =guide_legend(title= "Prediction"))

It seems that the multiplicative model is better to forecast the data of the Turnover.

c. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

accuracy(fit) %>% dplyr::select(".model","RMSE")
## # A tibble: 2 x 2
##   .model                        RMSE
##   <chr>                        <dbl>
## 1 Holt-Winters’ Damped          14.6
## 2 Holt-Winters’ Multiplicative  15.0

COnsidering the RMSE the best model is the Damped model, although the visual analyiss that we made previously.

d. Check that the residuals from the best method look like white noise.

fit %>% dplyr::select("Holt-Winters’ Damped") %>% gg_tsresiduals()

In the ACF plot we can see that we have so many spikes so the residuals are not necesarrily white noise, so we are going to try other model

fit %>% dplyr::select("Holt-Winters’ Multiplicative") %>% gg_tsresiduals()

The residuals perform better with the multiplicative model, although we sae that the damped model had lower RMSE

e. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

train <- retail %>%
  filter(year(Month) < 2011)

fit_train <- train %>%
  model( 
    "Holt-Winters’ Damped" = ETS(Turnover ~ 
                                error("M")+trend("Ad")+season("M")),
    "Holt-Winters’ Multiplicative" = ETS(Turnover ~
                                error("M")+trend("A")+season("M")),
    "Seasonal Naive"=SNAIVE(Turnover)
    
  )

validation <- dplyr::anti_join(retail, train, by=c("State","Industry","Month","Turnover"))

forecast2<-fit %>%
  forecast(validation)

autoplot(validation,Turnover)+autolayer(forecast2,level=NULL)+
  guides(colour=guide_legend(title="Prediction")) +
  ggtitle(" Comparation between methods")

From the comparison we can prove that Holt-Winters’ Multiplicative method can beat the seasonal naive approach. Otherwise Holt-Winters’Damped method didn’t show any significant difference with seasonal naive approach.

Exercise 9

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

lambda_retail <- retail %>%
                  features(Turnover, features = guerrero) %>%
                  pull(lambda_guerrero)
retail_trans<-train %>%
  mutate(bc=box_cox(Turnover,lambda_retail))

fit <- retail_trans%>%
  model(
    "STL (BoxCox)" = STL(bc ~ season(window="periodic"),
                         robust=T),
    "ETS (BoxCox)" = ETS(bc)
  )

fit2 <-  retail_trans%>%
    model(
      "Holt-Winters Multiplicative" =ETS(Turnover ~ error("M")+
                                                    trend("A")+
                                                   season("M"))
  )
      
rbind(accuracy(fit),accuracy(fit2))
## # A tibble: 3 x 12
##   State Industry .model .type     ME  RMSE   MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr> <chr>    <chr>  <chr>  <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 New ~ Hardwar~ STL (~ Trai~  0.382  4.73  3.08  0.0970  4.26 0.409 0.511 0.300
## 2 New ~ Hardwar~ ETS (~ Trai~  0.199  4.58  3.32  0.213   4.60 0.441 0.495 0.203
## 3 New ~ Hardwar~ Holt-~ Trai~ -1.14  13.5   9.55 -0.912   5.79 0.450 0.515 0.247

The STE boxcox model is the best model for this problem based in the RMSE.

Exercise 10

Compute the total domestic overnight trips across Australia from the tourism dataset.

a. Plot the data and describe the main features of the series.

total_trips <- tourism %>%
  summarise(Trips=sum(Trips))

total_trips %>% autoplot(Trips)

b. Decompose the series using STL and obtain the seasonally adjusted data.

dec1 <- total_trips %>%
  model(STL(Trips)) %>% components()

dec1 %>%
  as_tsibble()%>%
  autoplot()
## Plot variable not specified, automatically selected `.vars = Trips`

The data was adjusted with the decomposition.

c. Forecast the next two years of the series using an additive damped trend method applied to the seasonally adjusted data. (This can be specified using decomposition_model().)

total_trips %>%
  model(
    decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A")+trend("Ad")+season("N")))
  ) %>%
  forecast(h="2 years") %>%
  autoplot(total_trips)

The model seems to be a good predictor.

d.Forecast the next two years of the series using an appropriate model for Holt’s linear method applied to the seasonally adjusted data (as before but without damped trend).

total_trips %>%
  model(
    decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A")+trend("A")+season("N")))
  ) %>%
  forecast(h="2 years") %>%
  autoplot(total_trips)

e. Now use ETS() to choose a seasonal model for the data.

total_trips %>%
  model(ETS(Trips)
  ) %>%
  forecast(h="2 years") %>%
  autoplot(total_trips)

f. Compare the RMSE of the ETS model with the RMSE of the models you obtained using STL decompositions. Which gives the better in-sample fits?

fit <- total_trips %>%
  model(
    STL_AAdN = decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A")+trend("Ad")+season("N"))),
    STL_AAN = decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A")+trend("A")+season("N"))),
    ETS = ETS(Trips)
  )

accuracy(fit)
## # A tibble: 3 x 10
##   .model   .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>    <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 STL_AAdN Training 103.   763.  576. 0.367  2.72 0.607 0.629 -0.0174 
## 2 STL_AAN  Training  99.7  763.  574. 0.359  2.71 0.604 0.628 -0.0182 
## 3 ETS      Training 105.   794.  604. 0.379  2.86 0.636 0.653 -0.00151

g. Compare the forecasts from the three approaches? Which seems most reasonable?

fit %>%
  forecast(h="2 years") %>%
  autoplot(total_trips,level=NULL)+
  guides(colour=guide_legend(title="Prediction"))

The models seem to have all a god perfomance

h. Check the residuals of your preferred model.

model <- fit %>%
  dplyr::select(STL_AAN)

model %>%
  gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).

The residuals seem to be a white noise so the model is fitted to the data.

Exercise 11

For this exercise use the quarterly number of arrivals to Australia from New Zealand, 1981 Q1 – 2012 Q3, from data set aus_arrivals.

a.Make a time plot of your data and describe the main features of the series.

visits <- aus_arrivals %>% filter(Origin=='NZ')
autoplot(visits,Arrivals)+
  ggtitle("Total Arrivals")

b.Create a training set that withholds the last two years of available data. Forecast the test set using an appropriate model for Holt-Winters’ multiplicative method.

train<-visits %>%
  slice(1:(nrow(visits)-(4*2)))

train %>%
  model(
    ETS(Arrivals ~ error("M")+trend("A")+season("M"))
    
  ) %>%
  forecast(h="2 years")%>%
  autoplot(level=NULL)+
  autolayer(visits,Arrivals)+
  labs("Train for the data")

c.Why is multiplicative seasonality necessary here?

Because the size of the seasonal components is increasing in proportion to the trend.

d.Forecast the two-year test set using each of the following methods:

an ETS model;

an additive ETS model applied to a log transformed series;

a seasonal naïve method;

an STL decomposition applied to the log transformed data followed by

an ETS model applied to the seasonally adjusted (transformed) data.

c<-anti_join(visits,train,by=c("Quarter","Origin","Arrivals"))

fit <- train %>%
  model(
    'ETS model' = ETS(Arrivals),
    'Additive Log' = ETS(log(Arrivals)~ error("A")+trend("A")+season("A")),
    'Naive model'=SNAIVE(Arrivals),
    'STL decomposition to log transformed data' = decomposition_model(STL(log(Arrivals)),
                                                                ETS(season_adjust))
  )

forecast3 <- fit  %>% forecast(h="2 years")
forecast3 %>% autoplot(level=NULL)+autolayer(c,Arrivals)+
  guides(colour=guide_legend(title="Prediction"))+
  labs(title="Visitors forecasted data compared with real data")

e.Which method gives the best forecasts? Does it pass the residual tests?

forecast3 %>% accuracy(visits)
## # A tibble: 4 x 11
##   .model     Origin .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>      <chr>  <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Additive ~ NZ     Test   -7093. 17765. 12851. -2.17   4.20 0.864 0.918  0.0352
## 2 ETS model  NZ     Test   -3495. 14913. 11421. -0.964  3.78 0.768 0.771 -0.0260
## 3 Naive mod~ NZ     Test    9709. 18051. 17156.  3.44   5.80 1.15  0.933 -0.239 
## 4 STL decom~ NZ     Test  -12535. 22723. 16172. -4.02   5.23 1.09  1.17   0.109

The best model based in the RMSE is the ETS model.

f.Compare the same four methods using time series cross-validation instead of using a training and test set. Do you come to the same conclusions?

cv <- visits %>%
  slice(1:(n()-3)) %>%
  stretch_tsibble(.init =36,.step=3)

cv %>% 
  model(
    'ETS model' = ETS(Arrivals),
    'Additive Log' = ETS(log(Arrivals)~ error("A")+trend("A")+season("A")),
    'Naive model'=SNAIVE(Arrivals),
    'STL decomposition to log transformed data' = decomposition_model(STL(log(Arrivals)),
                                                                ETS(season_adjust))
  ) %>% forecast(h=3)%>% 
  accuracy(visits)
## # A tibble: 4 x 11
##   .model          Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>           <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive Log    NZ     Test  -375. 14347. 11021. 0.200  5.83 0.741 0.746 0.309
## 2 ETS model       NZ     Test  4627. 15327. 11799. 2.23   6.45 0.793 0.797 0.283
## 3 Naive model     NZ     Test  8244. 18768. 14422. 3.83   7.76 0.970 0.976 0.566
## 4 STL decomposit~ NZ     Test  4252. 15618. 11873. 2.04   6.25 0.798 0.812 0.244

Exercise 12.

a. Apply cross-validation techniques to produce 1 year ahead ETS and seasonal naïve forecasts for Portland cement production (from aus_production). Use a stretching data window with initial size of 5 years, and increment the window by one observation.

portland_cement_crossval <- aus_production %>%
  slice(1:(n()-4)) %>%
  stretch_tsibble(.init = 20, .step = 1)
port_cement_crossval_f <- portland_cement_crossval %>%
  model(ETS(Cement),
        SNAIVE(Cement)
        ) %>%
  forecast(h = "1 year")

b. Compute the MSE of the resulting 4-step-ahead errors. Comment on which forecasts are more accurate. Is this what you expected?.

port_cement_crossval_f %>% accuracy(aus_production)
## # A tibble: 2 x 10
##   .model         .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Cement)    Test  -0.795  112.  80.4 -0.397  5.29 0.797 0.845 0.628
## 2 SNAIVE(Cement) Test  30.3    139. 107.   1.91   6.97 1.06  1.05  0.602

Exercise 13.

Compare ETS(), SNAIVE() and decomposition_model(STL, ???) on the following five time series. You might need to use a Box-Cox transformation for the STL decomposition forecasts. Use a test set of three years to decide what gives the best forecasts.

a. Beer and bricks production from aus_production.

aus_production %>% autoplot(Beer) +
  labs(title = "Australian Beer Production") 

beer <- aus_production %>%
  slice(1:(nrow(aus_production)-12)) %>%
  model(
    ETS_beer = ETS(Beer),
    SNAIVE_beer = SNAIVE(Beer),
    STL_Decomp_beer = decomposition_model(STL(log(Beer)),
                                          ETS(season_adjust))
  ) %>%  forecast(h = "3 years")

beer %>%
  autoplot(filter_index(aus_production, "2000 Q1" ~ .), level = NULL)+
  guides(colour=guide_legend(title="Prediction"))

  labs(title = "Australian Beer Production")
## $title
## [1] "Australian Beer Production"
## 
## attr(,"class")
## [1] "labels"
beer %>% accuracy(aus_production)
## # A tibble: 3 x 10
##   .model          .type     ME  RMSE   MAE      MPE  MAPE  MASE RMSSE  ACF1
##   <chr>           <chr>  <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS_beer        Test   0.127  9.62  8.92  0.00998  2.13 0.563 0.489 0.376
## 2 SNAIVE_beer     Test  -2.92  10.8   9.75 -0.651    2.34 0.616 0.549 0.325
## 3 STL_Decomp_beer Test  -2.85   9.87  8.95 -0.719    2.16 0.565 0.502 0.283

For this dataset the ETS model has the lower RMSE, therefore the best performance.

bricks <- aus_production %>% filter(!is.na(Bricks)) 

bricks %>% autoplot(Bricks) + 
  labs(title = "Australian Quarterly Brick Production") 

the box cox transfomation is not applied because we see that the variance is not increasing.

bricks_models <- bricks %>% slice(1:(nrow(bricks)-12)) %>%
  model(
    ETS = ETS(Bricks),
    SNAIVE = SNAIVE(Bricks),
    STL_Decomp = decomposition_model(STL(log(Bricks)),
                                     ETS(season_adjust))
  ) %>%
forecast(h = "3 years")

bricks_models %>%
  autoplot(filter_index(bricks, "1980 Q2" ~ .), level = NULL) +
  labs(title = "Australian Quarterly Brick Production")+
  ggtitle("Beer Production")

bricks_models %>% accuracy(bricks)
## # A tibble: 3 x 10
##   .model     .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>      <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS        Test   2.27  17.5  13.2 0.474  3.31 0.365 0.354  0.339 
## 2 SNAIVE     Test  32.6   36.5  32.6 7.85   7.85 0.898 0.739 -0.322 
## 3 STL_Decomp Test   9.71  18.7  14.9 2.29   3.65 0.411 0.378  0.0933

The ETS model has the best performance for this data.

b. Cost of drug subsidies for diabetes (ATC2 == “A10”) and corticosteroids (ATC2 == “H02”) from PBS.

s <- PBS %>%
  filter(ATC2 %in% c("A10","H02"))%>%
  group_by(ATC2)%>%
  summarise(Total=sum(Cost))

s %>%
  autoplot(Total)+
  facet_grid(vars(ATC2),scales="free_y")+
  ggtitle("Total cost for diabetes and corticosteroids")

The serie for A10 seems to have a increasing variance, so we will apply a box cox transfomration.

c <- s %>%
  filter(ATC2 %in% 'H02')

cortico_cost <- c %>%
  features(Total, features = guerrero) %>%
  pull(lambda_guerrero)

cortico_models <-c %>% filter(Month < max(Month) - 35) %>%
  model(
    ETS = ETS(Total),
    SNAIVE = SNAIVE(Total),
    STL_Decomp = decomposition_model(STL(box_cox(log(Total), cortico_cost)), ETS(season_adjust))
  ) %>%
  forecast(h = "3 years")

d <- s %>%
  filter(ATC2 %in% 'A10')

diabetes_cost <- d %>%
  features(Total, features = guerrero) %>%
  pull(lambda_guerrero)

diabetes_models <- d %>%
  filter(Month < max(Month) - 35) %>%
  model(
    ETS = ETS(Total),
    SNAIVE = SNAIVE(Total),
    STL_Decomp = decomposition_model(STL(box_cox(log(Total), diabetes_cost)),
                                     ETS(season_adjust))
  ) %>%
  forecast(h = "3 years")

comparation <- bind_rows(cortico_models, diabetes_models)

comparation %>%
  autoplot(s, level = NULL) +
  labs(title = "Diabetes Vs Corticosteroids Forecasts")

For the A10 where we have the increasing variance the STL model fit better for the forecast data, because this model was built with the transformed data (with box cox).

comparation %>%
  accuracy(s)%>%
  arrange(ATC2)
## # A tibble: 6 x 11
##   .model     ATC2  .type       ME   RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>      <chr> <chr>    <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS        A10   Test  1382519. 2.36e6 1.85e6  5.83   8.74 1.89  2.00   0.177 
## 2 SNAIVE     A10   Test  4318158. 5.18e6 4.33e6 19.8   19.9  4.42  4.40   0.638 
## 3 STL_Decomp A10   Test    94089. 1.57e6 1.29e6 -0.269  6.63 1.32  1.33  -0.153 
## 4 ETS        H02   Test    27005. 7.65e4 6.45e4  1.99   7.05 1.09  1.07  -0.0990
## 5 SNAIVE     H02   Test   -14795. 8.55e4 7.16e4 -1.31   7.88 1.21  1.20   0.0226
## 6 STL_Decomp H02   Test    22681. 6.83e4 5.63e4  1.66   6.24 0.952 0.960 -0.219

For A10 the best model the STL decomposition with the transformed data, and with the H02 the best model was the STL decomposition with the transformed data.

c. Total food retailing turnover for Australia from aus_retail

food <- aus_retail %>%
  filter(Industry=="Food retailing") %>%
  summarise(Turnover=sum(Turnover))

food %>%
  autoplot(Turnover) +
  ggtitle("Total food retailing")

food_lambda <- food %>% features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

food_models <- food %>%
  filter(Month < max(Month) - 35) %>%
  model(ETS = ETS(Turnover),
        SNAIVE = SNAIVE(Turnover),
        STL_Decomp = decomposition_model(STL(box_cox(log(Turnover),
                                                     food_lambda)),
                                         ETS(season_adjust))
        ) %>%
  forecast(h = "3 years")
food_models %>% autoplot(filter_index(food, "2005 Jan" ~ .), level = NULL) +
  ggtitle("Australian food turnover")

food <- aus_retail %>%
  filter(Industry=="Food retailing") %>%
  summarise(Turnover=sum(Turnover))

food %>%
  autoplot(Turnover) +
  ggtitle("Total food retailing")

food_models %>% accuracy(food)
## # A tibble: 3 x 10
##   .model     .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS        Test  -151.    194.  170. -1.47   1.65 0.639 0.634 0.109
## 2 SNAIVE     Test   625.    699.  625.  5.86   5.86 2.35  2.29  0.736
## 3 STL_Decomp Test    -8.34  155.  132. -0.135  1.24 0.496 0.506 0.256

For this data set the best model is the STL decomposition model with box cox transformation.This is because we see taht the variance of the seasonal component increases with the time, so a box cox transformation works wll in this cases.

Exercise 14.

a. Use ETS() to select an appropriate model for the following series: total number of trips across Australia using tourism, the closing prices for the four stocks in gafa_stock, and the lynx series in pelt. Does it always give good forecasts?

trips<-tourism %>%
  summarise(Trips=sum(Trips))

fit <- trips %>%
  model(ETS(Trips))

report(fit)
## Series: Trips 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.4495675 
##     beta  = 0.04450178 
##     gamma = 0.0001000075 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]
##  21689.64 -58.46946 -125.8548 -816.3416 -324.5553 1266.752
## 
##   sigma^2:  699901.4
## 
##      AIC     AICc      BIC 
## 1436.829 1439.400 1458.267
fit %>%
  forecast()%>%
  autoplot(trips)+
  ggtitle("Prediction of total trips")

With the visual analysis we can see that a ETS model is fitted to the data fo this problem.

Gafa stocks

gafa_stock%>%
  autoplot(Close)+
  facet_grid(vars(Symbol),scales="free_y")

We see that the close price of this stocks is similar in terms of the time serie.

gafa2 <- gafa_stock %>%
  group_by(Symbol) %>%
  mutate(trading_day = row_number()) %>%
  ungroup() %>%
  as_tsibble(index = trading_day, regular = TRUE)

gafa_fc <- fit <- gafa2 %>%
  model(
    ETS(Close)
  )

report(fit)
## Warning in report.mdl_df(fit): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.
## # A tibble: 4 x 10
##   Symbol .model       sigma2 log_lik    AIC   AICc    BIC    MSE   AMSE    MAE
##   <chr>  <chr>         <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 AAPL   ETS(Close) 0.000228  -5299. 10604. 10604. 10620.   4.39   8.96 0.0106
## 2 AMZN   ETS(Close) 0.000383  -7778. 15561. 15561. 15577. 359.   700.   0.0129
## 3 FB     ETS(Close) 0.000356  -5442. 10890. 10890. 10906.   5.82  11.3  0.0126
## 4 GOOG   ETS(Close) 0.000219  -7529. 15064. 15064. 15079. 144.   291.   0.0102
gafa_fc %>%
  forecast(h=50) %>%
  autoplot(gafa2 %>% group_by_key() %>% slice((n() - 100):n())) +
  labs(title = "Prediction Closing Stock Prices")
## `mutate_if()` ignored the following grouping variables:
## * Column `Symbol`

#### pelt data

pelt %>%
  model(ETS(Lynx))
## # A mable: 1 x 1
##    `ETS(Lynx)`
##        <model>
## 1 <ETS(A,N,N)>
pelt %>%
  model(ETS(Lynx)) %>% forecast(h = 10) %>%
  autoplot(pelt) +
  ggtitle("Prediction Pelt trades")

b.Find an example where it does not work well. Can you figure out why?

Looking at the pelt prediction with a ETS model, we see that when you have cyclic data the model do not work well.

Exercise 15.

Show that the point forecasts from an ETS(M,A,M) model are the same as those obtained using Holt-Winters’ multiplicative method

pelt.etsF<-pelt %>%
  model(ETS(Lynx)) %>% forecast(h = 1)
pelt.etsF
## # A fable: 1 x 4 [1Y]
## # Key:     .model [1]
##   .model     Year              Lynx  .mean
##   <chr>     <dbl>            <dist>  <dbl>
## 1 ETS(Lynx)  1936 N(35399, 1.6e+08) 35399.
pelt2 <- ts(pelt$Lynx, start = c(1825),end=c(1934), frequency = 12)
hw <- HoltWinters(pelt2)
forecast <- predict(hw, n.ahead = 11, prediction.interval = T, level = 0.95)
forecast[5]
## [1] 38945.14

we can see that the value is similar in both models.

Exercise 16.

Show that the forecast variance for an ETS(A,N,N) model is given by.

a <- forecast::ets(pigs, model = "ANN")
b <- forecast(a,1)
b
##          Point Forecast    Lo 80   Hi 80    Lo 95    Hi 95
## May 1996       5216.989 4243.668 6190.31 3728.423 6705.555
a
## ETS(A,N,N) 
## 
## Call:
##  forecast::ets(y = pigs, model = "ANN") 
## 
##   Smoothing parameters:
##     alpha = 0.3573 
## 
##   Initial states:
##     l = 1692.08 
## 
##   sigma:  759.4864
## 
##      AIC     AICc      BIC 
## 5415.478 5415.563 5426.446
pigs_sigma <- 10308.58
alpha <- 0.2976
h <- 2
conf_int <- pigs_sigma*(1+alpha*(h-1))
b$mean[1]
## [1] 5216.989
b$mean[1] - conf_int
## [1] -8159.424
b$lower[1, '95%']
##      95% 
## 3728.423

Exercises Chapter 9

Exercise 1

Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.

a.Explain the differences among these figures. Do they all indicate that the data are white noise?

The three charts indicate that the data are white noise, because the ACF picks are all between the intervals of significance, the lines which indicate critical values for the ACF to be considered statistically significance.

For data with smaller number of samples, the ACF bars are taller than the data with larger number of samples. This is because the more values are randomly generated, the more robust it becomes so the model understands that they are random data and not correlated.

b.Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise?

These lines are estimated using +-1.96/ (N)^(1/2) with zero center. Mathematically, as N gets bigger, the absolute value of critical value become smaller. Statistically, this means that it is easier for smaller data set to exhibit correlation than larger data set.

Exercise 2.

A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.

library(fma)
## Warning: package 'fma' was built under R version 4.0.5
## 
## Attaching package: 'fma'
## The following objects are masked _by_ '.GlobalEnv':
## 
##     beer, pigs
## The following objects are masked from 'package:MASS':
## 
##     cement, housing, petrol
ggtsdisplay(ibmclose)+ggtitle("IBM Close Prices +ACF")

Looking at the plot we can see that there is a trend or seems to be a trend, so this data is not stationary.

In terms of ACF and PACF plots you expect that stationary data create a ACF plot that decreases so quickly (low correlation with big lags). On the other hand with the PACF model wee see that just the first lag has a correlated value (that is similar to the first lag of ACF) so this suggests that the data is non-stationary, since the 1st PACF is extremely high

Exercise 3

For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.

a. Turkish GDP from global_economy.

turkish <- global_economy %>%
  filter(Country == "Turkey") %>%
  mutate(GDP=GDP)

turkish<-ts(turkish$GDP,start=c(1960),end=c(2017))
ggtsdisplay(turkish)

The ACF plot suggest a difference of order 1, and we could think that the data has a exponential trend so wee will see the result of the box cox transfomration.

lambda_turk <- global_economy %>%
                    filter(Country == "Turkey") %>%
                    features(GDP, features = guerrero) %>%
                    pull(lambda_guerrero)
global_economy %>%
  filter(Country == "Turkey") %>%
  autoplot(box_cox(GDP, lambda_turk)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed GDP with $\\lambda$ = ",
         round(lambda_turk,2))))+
  theme_replace()+
  geom_line(col = "#FFC300")

The transformation seems to normalize the variance.The differenciation give us the followig result.

turk.s <- diff(turkish, 1)
ggtsdisplay(turk.s)

The differentiation shows us that the serie was normalice in terms of the trend, the root unit is as follows.

library(urca)
## Warning: package 'urca' was built under R version 4.0.5
turk.s %>% ur.kpss() %>% summary()
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 3 lags. 
## 
## Value of test-statistic is: 0.3856 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

The statistics are smaller than the 10% critical value. So the null hypothesis that the differenced data is stationary cannot be rejected.

b. Accommodation takings in the state of Tasmania from aus_accommodation.

library(forecast)
Tasmania <- aus_accommodation %>%
  filter(State == "Tasmania") %>%
  mutate(Takings=Takings)

tasmania<-ts(Tasmania$Takings,start=c(1998),end=c(2015))
ggtsdisplay(tasmania)

From the plots above, it appears the time series does not have trend and only has seasonality, the ACF PACF suggest a difference order of 4.

lambda_t <- aus_accommodation %>%
                    filter(State == "Tasmania") %>%
                    features(Takings, features = guerrero) %>%
                    pull(lambda_guerrero)
aus_accommodation %>%
  filter(State == "Tasmania") %>%
  autoplot(box_cox(Takings, lambda_t)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed Takings with $\\lambda$ = ",
         round(lambda_t,2))))+
  theme_replace()+
  geom_line(col = "#FFC300")

This seasonality that appears in the plot could be normalize with the next step that is the differenciation.

tas.s <- diff(tasmania, 4)
ggtsdisplay(tas.s)

library(urca)
tas.s %>% ur.kpss() %>% summary()
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 2 lags. 
## 
## Value of test-statistic is: 0.139 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

The statistics are smaller than the 10% critical value. So the null hypothesis that the differenced data is stationary cannot be rejected.

c. Monthly sales from souvenirs

souvenirs2<- ts(souvenirs$Sales,start=c(1987,1), end=c(1993,6), frequency=12)

ggtsdisplay(souvenirs2)

lambda_s <- souvenirs %>%
                    features(Sales, features = guerrero) %>%
                    pull(lambda_guerrero)
souvenirs %>%
  autoplot(box_cox(Sales, lambda_s)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed Sales with $\\lambda$ = ",
         round(lambda_s,2))))+
  theme_replace()+
  geom_line(col = "#FFC300")

souvenirs3<-(box_cox(souvenirs$Sales, lambda_s))

sou.s <- diff(souvenirs3, 9)
ggtsdisplay(sou.s)

### Exercise 4.

a<-auto.arima(souvenirs3)
summary(a)
## Series: souvenirs3 
## ARIMA(2,1,0) 
## 
## Coefficients:
##           ar1      ar2
##       -0.4434  -0.4252
## s.e.   0.1007   0.0997
## 
## sigma^2 = 0.3101:  log likelihood = -68.41
## AIC=142.83   AICc=143.13   BIC=150.08
## 
## Training set error measures:
##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.08184992 0.5467881 0.3866701 0.6412044 4.116318 0.9401246
##                     ACF1
## Training set -0.05871123

Since we are taking seasonal differencing, after transformation the backshift notation should be y(t) = (1-B^12) * y(t).

Exercise 5

For your retail data (from Exercise 8 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myts <- ts(myseries$Turnover, frequency=12, start=c(1982,4))
autoplot(myts)

ggtsdisplay(myts)

The data has seaonality and trend; and the seasonal variance increases over time.

bc.lambda <- BoxCox.lambda(myts)
myts.s <- myts %>% BoxCox(bc.lambda) %>% diff(1) %>% diff(12)
ggtsdisplay(myts.s)

We see that with a box cox transformation and a differentiation of 12 we can remove the seasonality fo the data.

Exercise 6

Simulate and plot some data from simple ARIMA models.

a. Use the following R code to generate data from an AR(1) model with ϕ1=0.6and σ2=1. The process starts with y1=0.

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
autoplot(sim)
## Plot variable not specified, automatically selected `.vars = y`

b. Produce a time plot for the series. How does the plot change as you change ϕ1

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.06*y[i-1] + e[i]
sim2 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.006*y[i-1] + e[i]
sim3 <- tsibble(idx = seq_len(100), y = y, index = idx)

data<-dplyr::bind_cols(sim,sim2,sim3)
## New names:
## * idx -> idx...1
## * y -> y...2
## * idx -> idx...3
## * y -> y...4
## * idx -> idx...5
## * ...
data<-data[,-c(3,5)]

autoplot(data)
## Plot variable not specified, automatically selected `.vars = y...2`

It seems that with smaller ϕ1, the data is more random.The data generated at i depends more on the error term at i than on the data at i-1. The auto correlation will be higher and decay for larger ϕ1.

c.Write your own code to generate data from an MA(1) model with θ1=0.6 and σ2=1

.

ma_generator <- function(theta, standardd, n, eo){
  y <- ts(numeric(n))
  e <- rnorm(n, sd=standardd)
  e[1] <- eo
  for(i in 2:n)
    y[i] <- theta*e[i-1] + e[i]
  return(y)
}

test<-ma_generator(0.6,1,100,0)

d.Produce a time plot for the series. How does the plot change as you change θ1?

data.list <- list()
i <- 0
theta.vec <- c(0.006, 0.06, 0.6)
for (theta in theta.vec){
  i <- i + 1
  data.list[[i]] <- ma_generator(theta,1,100,0)
}
data <- do.call(cbind, data.list)
colnames(data) <- paste('theta=', theta.vec, sep = '')
autoplot(data) + ylab('Data')

The smaller θ1 in the MA model means that the next data point generated is less dependant on the error in previous (i-1) term, so is more random.

e. Generate data from an ARMA(1,1) model with ϕ1=0.6,θ1=0.6 and σ2=1

y1 <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y1[i] <- 0.6*y1[i-1] + e[i] + 0.6*e[i-1]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
autoplot(sim) + ggtitle("ARMA(1,1)")
## Plot variable not specified, automatically selected `.vars = y`

f. Generate data from an AR(2) model with ϕ1=-0.8,ϕ2=0.3 and σ2=1

y2 <- ts(numeric(100))
e <- rnorm(100, sd=1)
for(i in 3:100)
  y2[i] <- -0.8*y2[i-1] + 0.3*y2[i-2] + e[i]
autoplot(y2) +
  ggtitle('AR(2)')

g.Graph the latter two series and compare them.

They plot were already made so we can conclude that the AR(2) generated data are obviously non-stationarya, and the amptitude of the curve increases exponentially over time, that could be think as the variance of the seasonality.

The ARMA(1,1) generated data does not have this apparent seasonality and it appears to be random and much more stationary than the AR(2) data

Exercise 7.

Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.

a.Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods..

fit <- aus_airpassengers %>%
  model(
    search = ARIMA(Passengers, stepwise = FALSE)
  )

report(fit)
## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65

The model selected was ARIMA(0,2,1)

fit %>%
  gg_tsresiduals()

The residuals seems to be white noise.

fit %>%
  forecast(h=10) %>%
  autoplot(aus_airpassengers)

The model seems to be fitted to the data so in working well with this problem.

b.Write the model in terms of the backshift operator.

c. Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.

fit2 <- aus_airpassengers %>%
  model(
    arima010 = ARIMA(Passengers ~ pdq(0,1,0))
  )

fit2 %>%
  forecast(h=10) %>%
  autoplot(aus_airpassengers)

The second model is considering less the past so is forecasting more in terms of the close time

d. Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.

fit2 <- aus_airpassengers %>%
  model(
    arima212 = ARIMA(Passengers ~ pdq(2,1,2))
  )
## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## Warning: 1 error encountered for arima212
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots
fit2 %>%
  forecast(h=10) %>%
  autoplot(aus_airpassengers)
## Warning in max(ids, na.rm = TRUE): ningun argumento finito para max; retornando
## -Inf
## Warning in max(ids, na.rm = TRUE): ningun argumento finito para max; retornando
## -Inf
## Warning: Removed 10 row(s) containing missing values (geom_path).

The result of the R package shows that the model is this case is numerical inestale, so we are facing a stability problem in terms of the algorithm.

fit2 <- aus_airpassengers %>%
  model(
    arima021 = ARIMA(Passengers ~ pdq(0,2,1))
  )

fit2 %>%
  forecast(h=10) %>%
  autoplot(aus_airpassengers)

This is te same model that autoarima generates so is the best fitted to the data.

Exercise 8.

For the United States GDP series (from global_economy)

####a.if necessary, find a suitable Box-Cox transformation for the data.

us_gdp <- global_economy %>%
  filter(Code == 'USA') %>%
  dplyr::select(Country, GDP)

autoplot(us_gdp)
## Plot variable not specified, automatically selected `.vars = GDP`

A box cox transformation is considered, in terms of the problem.

lambda_us <- us_gdp %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

us_gdp <- us_gdp %>%
  mutate(GDP = box_cox(GDP, lambda_us))

b.fit a suitable ARIMA model to the transformed data using ARIMA().

fit <- us_gdp %>%
  model(
    arima = ARIMA(GDP, stepwise = FALSE, approx = FALSE)
  )
report(fit)
## Series: GDP 
## Model: ARIMA(1,1,0) w/ drift 
## 
## Coefficients:
##          ar1  constant
##       0.4586  118.1822
## s.e.  0.1198    9.5047
## 
## sigma^2 estimated as 5479:  log likelihood=-325.32
## AIC=656.65   AICc=657.1   BIC=662.78

c.Try some other plausible models by experimenting with the orders chosen

fit2 <- us_gdp %>%
  model(
    arima = ARIMA(GDP ~ pdq(2,2,1))
    )

report(fit2)
## Series: GDP 
## Model: ARIMA(2,2,1) 
## 
## Coefficients:
##          ar1      ar2      ma1
##       0.3079  -0.0953  -0.7979
## s.e.  0.1850   0.1568   0.1437
## 
## sigma^2 estimated as 5834:  log likelihood=-321.06
## AIC=650.12   AICc=650.91   BIC=658.23
fit3 <- us_gdp %>%
  model(
    arima = ARIMA(GDP ~ pdq(0,1,1))
    )

report(fit3)
## Series: GDP 
## Model: ARIMA(0,1,1) w/ drift 
## 
## Coefficients:
##          ma1  constant
##       0.4026  219.6195
## s.e.  0.1098   13.6953
## 
## sigma^2 estimated as 5689:  log likelihood=-326.37
## AIC=658.73   AICc=659.18   BIC=664.86
fit4 <- us_gdp %>%
  model(
    arima = ARIMA(GDP ~ pdq(2,1,0))
    )

report(fit4)
## Series: GDP 
## Model: ARIMA(2,1,0) w/ drift 
## 
## Coefficients:
##          ar1     ar2  constant
##       0.4554  0.0071  117.2907
## s.e.  0.1341  0.1352    9.5225
## 
## sigma^2 estimated as 5580:  log likelihood=-325.32
## AIC=658.64   AICc=659.41   BIC=666.82

d.Choose what you think is the best model and check the residual diagnostics.

The model selected is the 1,1,0 because is the model that reduces the AICC metric.

fit %>%
  gg_tsresiduals()

The residuals seem to be white noise.

e.Produce forecasts of your fitted model. Do the forecasts look reasonable?

fit %>%
  forecast(h = 10) %>%
  autoplot(us_gdp)

Yes, for me the forecast is reasonable.

f.compare the results with what you would obtain using ETS() (with no transformation).

global_economy %>%
  filter(Code == 'USA') %>%
  dplyr::select(Country, GDP) %>%
  model(
    ETS(GDP)
  ) %>%
  forecast(h = 10) %>%
  autoplot(global_economy)

Is showing more variance in the forecast so without transformation we are facing more uncertainty

Exercise 9

Consider aus_arrivals, the quarterly number of international visitors to Australia from several countries for the period 1981 Q1 – 2012 Q3

a. Select one country and describe the time plot.

aus_uk <- aus_arrivals %>% filter(Origin=='UK')
aus_uk %>% autoplot(.vars=Arrivals) + 
  ggtitle("Australian Arrivals from uk")

b. Use differencing to obtain stationary data

aus_uk3<-ts(aus_uk$Arrivals,start =c(1981,1),frequency = 4)
aus_lambda <- BoxCox.lambda(aus_uk3)
aus_uk2 <- aus_uk3 %>% BoxCox(aus_lambda) %>% diff(1) %>% diff(12)
ggtsdisplay(aus_uk2)

c. What can you learn from the ACF graph of the differenced data?

This plot show us that the data has a correlation between that point and the past 12 lags so will be a good differentiation.

d. What can you learn from the PACF graph of the differenced data?

They plots suggest that the model has a differentiation of 12 and a autoregressive part that will not be needed, this conclusion will be seen in the auto arima model. Also a Box cox transformation was used because the variance of the data was increasing over the time.

e. What model do these graphs suggest?

A model of (0,1,1) 12

f. Does ARIMA() give the same model that you chose? If not, which model do you think is better?

aus_uk4 <- aus_uk3 %>% BoxCox(aus_lambda)
auto.arima(aus_uk4)
## Series: aus_uk4 
## ARIMA(0,1,1)(0,1,1)[4] 
## 
## Coefficients:
##           ma1     sma1
##       -0.5149  -0.7691
## s.e.   0.0803   0.0814
## 
## sigma^2 = 0.002822:  log likelihood = 184.03
## AIC=-362.07   AICc=-361.86   BIC=-353.66

This is the best model because is optimice with the AIC metric in relation of the erros that the model has.

Exercise 10

Choose a series from us_employment, the total employment in different industries in the United States

a.Produce an STL decomposition of the data and describe the trend and seasonality.

us_employment %>% filter(Title == "Durable Goods") %>%
  model(STL(Employed)) %>%
  components() %>% autoplot()

b. Do the data need transforming? If so, find a suitable transformation.

The data do not show exponential variance so I do not think that transformation is needed.

c. Are the data stationary? If not, find an appropriate differencing which yields stationary data.

We will apply the statistic test.

us_employment %>% filter(Title == "Durable Goods") %>%
  features(Employed, unitroot_kpss)
## # A tibble: 1 x 3
##   Series_ID     kpss_stat kpss_pvalue
##   <chr>             <dbl>       <dbl>
## 1 CEU3100000001      2.23        0.01

As the p value is less than 0.05 we can reject the null hypothesis and say that the data is not stationary.

d. Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AICc values?

fit <- us_employment %>%
  filter(Title == "Durable Goods") %>%
  model(arima310 = ARIMA(Employed ~ pdq(1,1,1)),
        arima210 = ARIMA(Employed ~ pdq(2,1,0)))
report (fit)
## Warning in report.mdl_df(fit): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.
## # A tibble: 2 x 9
##   Series_ID     .model   sigma2 log_lik    AIC   AICc    BIC ar_roots  ma_roots
##   <chr>         <chr>     <dbl>   <dbl>  <dbl>  <dbl>  <dbl> <list>    <list>  
## 1 CEU3100000001 arima310 12134.  -5924. 11857. 11857. 11882. <cpl [1]> <cpl>   
## 2 CEU3100000001 arima210 12468.  -5937. 11883. 11883. 11908. <cpl [2]> <cpl>

The best model is the ARIMA(1,1,1)

e. Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better.

fit2 <- us_employment %>%
  filter(Title == "Durable Goods") %>%
  model(ARIMA(Employed)) %>%
  report(fit2)
## Series: Employed 
## Model: ARIMA(1,1,3)(2,0,0)[12] 
## 
## Coefficients:
##          ar1      ma1      ma2     ma3    sar1    sar2
##       0.7320  -0.5125  -0.0293  0.1658  0.2170  0.1912
## s.e.  0.0486   0.0549   0.0371  0.0433  0.0316  0.0317
## 
## sigma^2 estimated as 11665:  log likelihood=-5903.93
## AIC=11821.85   AICc=11821.97   BIC=11855.98

The best model for this case is the model that reduces AIC, that is (1,1,3)(2,0,0) so we have data with seasonality.

fit2 %>% gg_tsresiduals()

Residuals seem to be a white noise.

f.Forecast the next 3 years of data. Get the latest figures from https://fred.stlouisfed.org/categories/11 to check the accuracy of your forecasts.

fit3<- us_employment %>%
  filter(Title == "Durable Goods") %>%
  model(ARIMA(Employed))

fit3 %>%
  forecast(h = "3 years") %>%
  autoplot(us_employment, level = NULL) + ggtitle("Durable Goods Forecast")

Compared with the real data the model has a relative good accuracy, but could be better.

g. Eventually, the prediction intervals are so wide that the forecasts are not particularly useful. How many years of forecasts do you think are sufficiently accurate to be usable?

forecast <- fit3 %>%
  forecast(h = "3 years") %>%
  mutate(PI = hilo(Employed, level = 95))
forecast
## # A fable: 36 x 6 [1M]
## # Key:     Series_ID, .model [1]
##    Series_ID     .model       Month        Employed .mean                     PI
##    <chr>         <chr>        <mth>          <dist> <dbl>                 <hilo>
##  1 CEU3100000001 ARIMA(E~ 2019 oct.  N(8061, 11665) 8061. [7849.744, 8273.108]95
##  2 CEU3100000001 ARIMA(E~ 2019 nov.  N(8065, 29010) 8065. [7731.212, 8398.870]95
##  3 CEU3100000001 ARIMA(E~ 2019 dic.  N(8068, 50294) 8068. [7628.559, 8507.656]95
##  4 CEU3100000001 ARIMA(E~ 2020 ene.  N(8050, 80633) 8050. [7493.864, 8606.963]95
##  5 CEU3100000001 ARIMA(E~ 2020 feb. N(8055, 118614) 8055. [7379.686, 8729.722]95
##  6 CEU3100000001 ARIMA(E~ 2020 mar. N(8059, 162732) 8059. [7268.159, 8849.461]95
##  7 CEU3100000001 ARIMA(E~ 2020 abr. N(8059, 211634) 8059. [7157.110, 8960.423]95
##  8 CEU3100000001 ARIMA(E~ 2020 may. N(8065, 264194) 8065. [7057.787, 9072.623]95
##  9 CEU3100000001 ARIMA(E~ 2020 jun. N(8095, 319515) 8095. [6986.658, 9202.422]95
## 10 CEU3100000001 ARIMA(E~ 2020 jul. N(8086, 376901) 8086. [6883.184, 9289.718]95
## # ... with 26 more rows

Exercise 11

Choose one of the following seasonal time series: the Australian production of electricity, cement, or gas (from aus_production).

aus_production %>% autoplot(Electricity) + ggtitle("Australian Electricity Production")

a.Do the data need transforming? If so, find a suitable transformation

Yes we can apply a transformation because the variance is increasing over the time.

aus_elec <- aus_production %>% dplyr::select("Quarter", "Electricity")
aus_elec$Electricity <- log(aus_elec$Electricity)
aus_elec %>% autoplot(.vars=Electricity) +
  labs(title = "Australian Electricity Production with Transformation") 

b.Are the data stationary? If not, find an appropriate differencing which yields stationary data.

aus_elec %>%
  features(Electricity, unitroot_kpss)
## # A tibble: 1 x 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1      4.26        0.01

Is not stationary as we see in the statistic test.

c. Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AIC values?

elec_fit <- aus_elec %>%
  model(arima310 = ARIMA(Electricity ~ pdq(3,1,0)),
        arima210 = ARIMA(Electricity ~ pdq(2,1,0)))
report (elec_fit)
## Warning in report.mdl_df(elec_fit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 2 x 8
##   .model     sigma2 log_lik    AIC   AICc    BIC ar_roots   ma_roots 
##   <chr>       <dbl>   <dbl>  <dbl>  <dbl>  <dbl> <list>     <list>   
## 1 arima310 0.000357    545. -1076. -1076. -1053. <cpl [11]> <cpl [4]>
## 2 arima210 0.000350    546. -1080. -1080. -1060. <cpl [6]>  <cpl [8]>

The best is ARIMA 3,1,0.

d. Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better.

fit2<- aus_elec %>%
  model(ARIMA(Electricity)) %>%
  report(fit2)
## Series: Electricity 
## Model: ARIMA(1,1,1)(0,1,1)[4] 
## 
## Coefficients:
##          ar1      ma1     sma1
##       0.4537  -0.8002  -0.6075
## s.e.  0.1320   0.0969   0.0527
## 
## sigma^2 estimated as 0.0003586:  log likelihood=543.29
## AIC=-1078.59   AICc=-1078.4   BIC=-1065.14
fit2 %>% gg_tsresiduals()

Yes the residuals seem to be white noise.

fit2 %>% gg_tsresiduals()

e. Forecast the next 24 months of data using your preferred model.

elec_arima <- aus_elec %>%
  model(ARIMA(Electricity))

elec_arima %>%
  forecast(h = "2 years") %>%
  autoplot(aus_elec, level = NULL)

The model made a good forecasting based on the visual analysis.

f. Compare the forecasts obtained using ETS().

ets <- aus_elec %>% model(ETS(Electricity))
ets %>% forecast(h=24) %>%
  autoplot(aus_elec) +ggtitle("Forecasted Australian Electricity Production using ETS")

Exercise 12

aus_decomp <- stl(aus_elec, t.window = 12, s.window = "periodic")

aus_decomp2 <- seasadj(aus_decomp)

aus_elec_stl <- stlf(aus_decomp2, method = "arima")
aus_elec_fc <- forecast(aus_elec_stl)
plot(aus_elec_fc) 

We obtain a good result because we are considering seasonality transfomration previously a ARIMA model, in the same concept with SARIMA.

Exercise 13

For the Australian tourism data (from tourism):

a. Fit ARIMA models for each time series.

fit <- tourism %>% model(ARIMA(Trips))
## Warning in sqrt(diag(best$var.coef)): Se han producido NaNs
report(fit)
## Warning in report.mdl_df(fit): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.
## # A tibble: 304 x 11
##    Region         State Purpose .model sigma2 log_lik   AIC  AICc   BIC ar_roots
##    <chr>          <chr> <chr>   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <list>  
##  1 Adelaide       Sout~ Busine~ ARIMA~ 1.19e3   -395.  799.  799.  808. <cpl>   
##  2 Adelaide       Sout~ Holiday ARIMA~ 5.38e2   -365.  738.  738.  747. <cpl>   
##  3 Adelaide       Sout~ Other   ARIMA~ 1.86e2   -318.  642.  643.  650. <cpl>   
##  4 Adelaide       Sout~ Visiti~ ARIMA~ 8.79e2   -384.  776.  776.  785. <cpl>   
##  5 Adelaide Hills Sout~ Busine~ ARIMA~ 1.85e1   -230.  463.  463.  468. <cpl>   
##  6 Adelaide Hills Sout~ Holiday ARIMA~ 3.80e1   -256.  516.  516.  521. <cpl>   
##  7 Adelaide Hills Sout~ Other   ARIMA~ 2.11e0   -141.  293.  293.  304. <cpl>   
##  8 Adelaide Hills Sout~ Visiti~ ARIMA~ 1.09e2   -298.  599.  599.  604. <cpl>   
##  9 Alice Springs  Nort~ Busine~ ARIMA~ 4.29e1   -260.  527.  527.  534. <cpl>   
## 10 Alice Springs  Nort~ Holiday ARIMA~ 1.01e2   -285.  575.  576.  582. <cpl>   
## # ... with 294 more rows, and 1 more variable: ma_roots <list>

b. Produce forecasts of your fitted models.

fit %>% forecast(h=12) %>%
  autoplot(tourism) 

c. Check the forecasts for the “Snowy Mountains” and “Melbourne” regions. Do they look reasonable?

fit %>% forecast(h=12) %>%
  filter(Region=="Snowy Mountains") %>%
  autoplot(tourism) 

fit %>% forecast(h=12) %>%
  filter(Region=="Melbourne") %>%
  autoplot(tourism) 

Exercise 14

For your retail time series (Exercise 5 above):

a.Develop an appropriate seasonal ARIMA model

turnover <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1)) 
fit3 <- turnover %>%
  model(
    arima1 = ARIMA(Turnover ~ pdq(1,1,2) + PDQ(0,1,1)),
    arima2 = ARIMA(Turnover ~ pdq(2,1,0) + PDQ(0,1,1)),
    auto = ARIMA(Turnover, stepwise = FALSE, approx = FALSE)
  )
fit3
## # A mable: 1 x 5
## # Key:     State, Industry [1]
##   State           Industry                      arima1                    arima2
##   <chr>           <chr>                        <model>                   <model>
## 1 South Australia Cafes, re~ <ARIMA(1,1,2)(0,1,1)[12]> <ARIMA(2,1,0)(0,1,1)[12]>
## # ... with 1 more variable: auto <model>