## Import the following libraries..
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.2     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3
## ✔ ggplot2     3.4.2     ✔ fabletools  0.3.4
## Warning: package 'tsibble' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

Problem 9.1 Figure 3.2 shows ACF

A.

The differences between the three figures is that the lag between each spikes are increasing with the amount of random numbers we are displaying. We can see that there are no significant spikes present within each acf plot indicating that the data is white noise.

B.

The critical values are at different distances from the mean of 0 it is because we are sampling more and more random numbers and thus the spikes are getting smaller and smaller and the confidence intervals are getting narrower and narrower. The autocorrelations differ precisely because they are white-noise. If they were not white-noise then the significant spikes would be in identical positions within the plots.


9.2 A classic Example

gafa_stock %>%
  filter(Symbol == "AMZN") %>%
  autoplot(Close) + labs(title = "Close Prices Of Amazon Stocks")

The data displays an increasing trend with some seasonality involved.


## First we re-index the data
amzn_stock <- gafa_stock |>
  filter(Symbol == "AMZN") |>
  mutate(day = row_number()) |>
  update_tsibble(index = day, regular = TRUE) %>%
  select(Close)
amzn_stock %>%
  gg_tsdisplay(plot_type = "partial")
## Plot variable not specified, automatically selected `y = Close`

Acoording to the plots of the Amazon stock closing prices we can see on our plot that the amazon closing prices have an increasing trend with some seasonality, this tells us that the data is non-stationary, glancing at the acf and pacf plots we can see significant spikes for each lag in the acf plot as well as some minor significant spikes that were also captured in the pacf plot. Because of this, we can try to difference the series either by first or second-order in order to ensure that the series is stationary and or the residuals look like white noise.


9.3 For the Following Series find an appropriate

Turkish GDP

Turkey_GDP <- global_economy %>%
  filter(Country == "Turkey")

  
Turkey_GDP %>%
  autoplot(GDP) + labs(title = "Turkey GDP")

lambda <- Turkey_GDP |>
  features(GDP, features = guerrero) |>
  pull(lambda_guerrero)
## Looks smiliar to a log transformation
Turkey_GDP %>%
  autoplot(box_cox(GDP,lambda)) + labs(title = "GDP Transformed (Box-Cox)")


Applying The Differences

Turkey_GDP %>%
  ACF(box_cox(GDP,lambda)) %>%
  autoplot() + labs(title = "AMZN Closing Stock Price (logged)")

Seems like logging it increases the lags to be more signficant

## Applying The Difference 
Turkey_GDP %>%
  gg_tsdisplay(difference(box_cox(GDP,lambda)),plot_type = "partial")
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

Seems like applying the difference once made the amazon closing stock price into a white-noise series.


Pormanteau Test

## Use a lag value of 10 
Turkey_GDP |>
  mutate(diff_GDP = difference(box_cox(GDP,lambda))) |>
  features(diff_GDP, ljung_box, lag = 10)
## # A tibble: 1 × 3
##   Country lb_stat lb_pvalue
##   <fct>     <dbl>     <dbl>
## 1 Turkey     5.84     0.829

Since the p-value is not smaller than 0.05 we can deduce that the series is now stationary.


B. Accomodation

Tasmania <- aus_accommodation %>%
  filter(State == "Tasmania")

Tasmania %>%
  autoplot(Takings) + labs(title = "Tasmaina Taking")

The time series data shows increasing trend, variance and seasonality

Tlambda <- Tasmania |>
  features(Takings, features = guerrero) |>
  pull(lambda_guerrero)
## Looks smiliar to a log transformation
Tasmania %>%
  autoplot(box_cox(Takings,Tlambda)) + labs(title = "Takings Transformed (Box-Cox)")

The variance seems to be stabilized on both sides of the chart, using a log transformation again.


Tasmania %>%
  ACF(box_cox(Takings,Tlambda)) %>%
  autoplot() + labs(title = "Tasmania Takings (logged)")

There appears to be some signficants spikes between every other lag points.


Applying The Difference

We apply a seasonal difference

Tasmania %>%
        gg_tsdisplay(difference(box_cox(Takings,Tlambda),12),plot_type = "partial")
## Warning: Removed 12 rows containing missing values (`geom_line()`).
## Warning: Removed 12 rows containing missing values (`geom_point()`).

The ACF shows some spikes crossing over the lines and the pacf has captured a big significant spike at the second lag.


Pormanteau Test

## Use a lag value of 10 
Tasmania |>
  mutate(diff_Tak = difference(box_cox(Takings,Tlambda),12)) |>
  features(diff_Tak, ljung_box, lag = 10)
## # A tibble: 1 × 3
##   State    lb_stat lb_pvalue
##   <chr>      <dbl>     <dbl>
## 1 Tasmania    234.         0

The p-value is less than 0 which means we have to apply a first-order differencing in order to make it stationary.


First Order Difference

Tasmania %>%
        gg_tsdisplay(difference(difference(box_cox(Takings,Tlambda),12),1),plot_type = "partial")
## Warning: Removed 13 rows containing missing values (`geom_line()`).
## Warning: Removed 13 rows containing missing values (`geom_point()`).


Pormanteau Test

## Use a lag value of 10 
Tasmania |>
  mutate(diff_Tak = difference(difference(box_cox(Takings,Tlambda),12),1)) |>
  features(diff_Tak, ljung_box, lag = 10)
## # A tibble: 1 × 3
##   State    lb_stat lb_pvalue
##   <chr>      <dbl>     <dbl>
## 1 Tasmania    10.4     0.409

After applying a seasonal difference and then we applied a first-order differencing and the p-value is significant..

C. Monthly Sales

souvenirs
## # A tsibble: 84 x 2 [1M]
##       Month Sales
##       <mth> <dbl>
##  1 1987 Jan 1665.
##  2 1987 Feb 2398.
##  3 1987 Mar 2841.
##  4 1987 Apr 3547.
##  5 1987 May 3753.
##  6 1987 Jun 3715.
##  7 1987 Jul 4350.
##  8 1987 Aug 3566.
##  9 1987 Sep 5022.
## 10 1987 Oct 6423.
## # ℹ 74 more rows
souvenirs %>%
  autoplot(Sales) + labs(title ="Souvenirs Sales")

Slambda <- souvenirs |>
  features(Sales, features = guerrero) |>
  pull(lambda_guerrero)
souvenirs %>%
  autoplot(box_cox(Sales,Slambda)) + labs(title ="Souvenirs Sales (Box_Cox)")

Seems like applying the lambda has improved the trend shaped and seasonality

souvenirs %>%
  ACF(box_cox(Sales,Slambda)) %>%
  autoplot() + labs(title = "Souvenirs Sales (logged)")

There appears to be some significant lags from the ACF plot before we difference it.

Applying The Differences

souvenirs %>%
        gg_tsdisplay(difference(box_cox(Sales,Slambda)),plot_type = "partial")
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

Pormanteau Test

## Use a lag value of 10 
souvenirs |>
  mutate(diff_Sales = difference(box_cox(Sales,Slambda))) |>
  features(diff_Sales, ljung_box, lag = 10)
## # A tibble: 1 × 2
##   lb_stat lb_pvalue
##     <dbl>     <dbl>
## 1    30.1  0.000828

We have a significant level of 0.0008 which tells us that the data has not been transformed into a stationary series we need to perform a second-order difference again.


First Order Difference

souvenirs %>%
        gg_tsdisplay(difference(difference(box_cox(Sales,Slambda),12),1),plot_type = "partial")
## Warning: Removed 13 rows containing missing values (`geom_line()`).
## Warning: Removed 13 rows containing missing values (`geom_point()`).

First-Order Pormanteau Test

souvenirs |>
  mutate(diff_Sales = difference(difference(box_cox(Sales,Slambda),12),1)) |>
  features(diff_Sales, ljung_box, lag = 10)
## # A tibble: 1 × 2
##   lb_stat lb_pvalue
##     <dbl>     <dbl>
## 1    22.7    0.0120

The pormanteau has gone up but the p-value is still significant meaning we would have to apply a higher-order differencing to make the data stationery.

Unit Root Test

souvenirs |>
  features(box_cox(Sales,Slambda),unitroot_kpss)
## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1      1.79        0.01
souvenirs |>
  features(difference(box_cox(Sales,Slambda)),unitroot_kpss)
## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1    0.0631         0.1

Looking at the kpss test, we can see that once we apply a difference the p-value is 0.1 which is according to the textbook the differenced data appears to be stationery. (But then why is the pormanteau test is giving me a small p-value in this case?)


9.5. For your retail data

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
  autoplot(Turnover) + labs(title = "Retail Data")

The time series has an increasing trend and a strong seasonality.

Saleslambda <- myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)

Applying The Lambda value

myseries %>%
  autoplot(box_cox(Turnover,Saleslambda)) + labs(title = "Box_Cox Sales")

Applying the transformation again it has stabilized the variance again.

myseries %>%
  gg_tsdisplay(box_cox(Turnover,Saleslambda),plot_type = "partial")

The acf has significant lags as well as some on the PACF..


Apply the difference

## The lag parameter displays up to lagged values to 36. 
myseries %>%
  gg_tsdisplay(difference(box_cox(Turnover,Saleslambda),12),plot_type = "partial",lag =36)
## Warning: Removed 12 rows containing missing values (`geom_line()`).
## Warning: Removed 12 rows containing missing values (`geom_point()`).

The acf plot is decreasing or it is decaying towards zero and we have signifcant spikes every 12th lag on the pacf plot, hence clearly the data is still non-stationery. So we should take a further first difference.

First Order Difference

myseries %>%
  gg_tsdisplay(difference(box_cox(Turnover,Saleslambda),12) |> difference(),plot_type = "partial",lag = 36)
## Warning: Removed 13 rows containing missing values (`geom_line()`).
## Warning: Removed 13 rows containing missing values (`geom_point()`).


Unit-Root Test..

myseries |> 
  features(box_cox(Turnover,Saleslambda),unitroot_ndiffs)
## # A tibble: 1 × 3
##   State              Industry                                            ndiffs
##   <chr>              <chr>                                                <int>
## 1 Northern Territory Clothing, footwear and personal accessory retailing      1
myseries |>
  mutate(diff_close = difference(box_cox(Turnover,Saleslambda),12) |> difference()) |>
  features(diff_close, unitroot_kpss)
## # A tibble: 1 × 4
##   State              Industry                              kpss_stat kpss_pvalue
##   <chr>              <chr>                                     <dbl>       <dbl>
## 1 Northern Territory Clothing, footwear and personal acce…    0.0165         0.1

According to the kpss unitroot test, It seems applying the second difference this time, the p-value is 0.1, we can conclude that the differenced data appears stationery.


9.6. Use the following R code..

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)

A. Simulate and Plot

ari <- function(phi,sigmasq){
  set.seed(10)
  y <- numeric(100)
  e <- rnorm(100,sigmasq)
  for(i in 2:100)
  ## This is the formula.. 
  y[i] <- phi*y[i-1] + e[i]
  sim <- tsibble(idx = seq_len(100), y = y, index = idx)
  return(sim)
  
}
phi <- 0.6
sigma2 <- 1

ari(0.6,1) %>%
  autoplot() + labs(title = "Time Series with phi = 0.6")
## Plot variable not specified, automatically selected `.vars = y`

B. Produce A Time-Plot

ari(0.7,1) %>%
  autoplot() + labs(title = "Time Series with phi = 0.7")
## Plot variable not specified, automatically selected `.vars = y`

ari(0.9,1) %>%
  autoplot() + labs(title = "Time Series with phi = 0.9")
## Plot variable not specified, automatically selected `.vars = y`

ari(0.3,1) %>%
  autoplot() + labs(title = "Time Series with phi = 0.3")
## Plot variable not specified, automatically selected `.vars = y`

ari(0.2,1) %>%
  autoplot() + labs(title = "Time Series with phi = 0.2")
## Plot variable not specified, automatically selected `.vars = y`

It seems like a higher value of phi the data seems to display less and less cyclic behavior, where as lower values of phi seems to displays sharp and sharper cyclic behavior.

C. Write Your Own Code.

mov_avg <- function(theta,sigmasq){
  set.seed(15)
  y <- numeric(100)
  e <- rnorm(100,sigmasq)
  for (i in 2:100)
  ## This is the formula.. for moving average..
  y[i] <- e[i] + theta * e[i-1]
  sim <- tsibble(idx = seq_len(100), y = y, index = idx)
}
mov_avg(0.6,1) %>%
  autoplot() + labs(title = "Moving Average Time Plot with Theta = 0.6")
## Plot variable not specified, automatically selected `.vars = y`

D. Produce a time-plot

mov_avg(0.9,1) %>%
  autoplot() + labs(title = "Moving Average with theta = 0.9")
## Plot variable not specified, automatically selected `.vars = y`

mov_avg(0.1,1) %>%
  autoplot() + labs(title = "Moving Average with theta = 0.1")
## Plot variable not specified, automatically selected `.vars = y`

It’s difficult to tell but lower values of theta there are more sharp peaks and declines but as theta increases there are less pronounced peaks.

E. Generate Data from an ARMA(1,1)

arma <- function(theta,phi,sigmasq){
  set.seed(10)
  y <- numeric(100)
  e <- rnorm(100,sigmasq)
  for (i in 2:100)
  ## We combine the two formula together..
  y[i] <- e[i] + theta * e[i-1] + phi * y[i-1]
  sim <- tsibble(idx = seq_len(100), y = y, index = idx)
}
arma(0.6,0.6,1) %>%
  gg_tsdisplay(plot_type = "partial") + labs(title = "ARMA(1,1) plot")
## Plot variable not specified, automatically selected `y = y`


F. Generate Data from an AR(2)

arma2 <- function(theta,theta2,sigmasq){
  set.seed(10)
  y <- numeric(100)
  e <- rnorm(100,sigmasq)
  for (i in 3:100)
  ## We combine the two formula together..
  y[i] <- e[i] + theta * e[i-1] + theta2 * e[i-2]
  sim <- tsibble(idx = seq_len(100), y = y, index = idx)
}
arma2(-0.8,0.3,1) %>%
  gg_tsdisplay(plot_type = "partial") + labs(title = "AR(2)")
## Plot variable not specified, automatically selected `y = y`

G. Graph The Latter Two Series and Compare

The arma2 series seems to be oscilatting with higher variance as the time goes on, and the acf seems to displays significant lags at different time points while the arma(1,1) seems to have a slight increase in trend with the acf plot decaying towards 0 so the plot doesn’t appear to be stationary.


9.7. Consider aus_passengers

aus_airpassengers
## # A tsibble: 47 x 2 [1Y]
##     Year Passengers
##    <dbl>      <dbl>
##  1  1970       7.32
##  2  1971       7.33
##  3  1972       7.80
##  4  1973       9.38
##  5  1974      10.7 
##  6  1975      11.1 
##  7  1976      10.9 
##  8  1977      11.3 
##  9  1978      12.1 
## 10  1979      13.0 
## # ℹ 37 more rows

A. Use ARIMA to find approriate model

aus_fit <- aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(stepwise = ARIMA(Passengers),
        search = ARIMA(Passengers,stepwise = FALSE))
report(aus_fit[1])
## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8756
## s.e.   0.0722
## 
## sigma^2 estimated as 4.671:  log likelihood=-87.8
## AIC=179.61   AICc=179.93   BIC=182.99
report(aus_fit[2])
## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8756
## s.e.   0.0722
## 
## sigma^2 estimated as 4.671:  log likelihood=-87.8
## AIC=179.61   AICc=179.93   BIC=182.99
glance(aus_fit) %>%
  arrange(AICc) %>%
  select(.model:BIC)
## # A tibble: 2 × 6
##   .model   sigma2 log_lik   AIC  AICc   BIC
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 stepwise   4.67   -87.8  180.  180.  183.
## 2 search     4.67   -87.8  180.  180.  183.

Using the automated method with ARIMA with stepwise included and excluded we seems to have the same metrics so there appears to be no diffrence between the search and stepwise method.

aus_fit %>%
  select(search) %>%
  gg_tsresiduals()

Looking at the residuals we can see that the data appears to be stationery.


Pormanteau Test

## degree of freedom is what P+Q is equal to so 0 + 1 = 1 
augment(aus_fit) |>
  filter(.model=='search') |>
  features(.innov, ljung_box, lag = 10, dof = 1)
## # A tibble: 1 × 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 search    6.00     0.740

Using the pormanteau test with a dof of 1 we can see that the p-value is not signficant telling us that the residuals look like white noise.


Producing Forecasts..

aus_fit |>
  forecast(h=10) |>
  filter(.model=='search') |>
  autoplot(aus_airpassengers) + labs(title = "Forecasts For the next 10 years")


B. Write the model in terms of the backshift operator.

Rereading the chapters

\[ (1-B)^2y_t = c + (1 -0.87B)\epsilon_t \]

I believe this is the backshift operation since we performed since d = 2 and the coefficent for moving average is -0.87


C. Plot forecasts ARIMA(0,1,0)

aus_fit2 <- aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(
        arima010 = ARIMA(Passengers ~ pdq(0,1,0)))
aus_fit2 %>%
  forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(title = "Forecast with ARIMA(0,1,0)")

report(aus_fit2)
## Series: Passengers 
## Model: ARIMA(0,1,0) w/ drift 
## 
## Coefficients:
##       constant
##         1.3669
## s.e.    0.3319
## 
## sigma^2 estimated as 4.629:  log likelihood=-89.08
## AIC=182.17   AICc=182.48   BIC=185.59

The forecasts seems to be influenced by the most-recent observation as opposed to the searchwise ARIMA where slightly older observations are influencing the forecast.


D. Plot Forecasts with ARIMA(2,1,2)

aus_fit3 <- aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(
        arima212 = ARIMA(Passengers ~ pdq(2,1,2)))
aus_fit3 %>%
  forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(title = "Forecast with ARIMA(2,1,2)")

report(aus_fit3)
## Series: Passengers 
## Model: ARIMA(2,1,2) w/ drift 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2  constant
##       1.4694  -0.5103  -1.5736  0.6780    0.0650
## s.e.  0.3780   0.3558   0.3081  0.2688    0.0294
## 
## sigma^2 estimated as 4.748:  log likelihood=-87.74
## AIC=187.47   AICc=189.94   BIC=197.75

Without The constant..

## Add a zero so constant can be excluded..
aus_fit4 <- aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(
        arima212 = ARIMA(Passengers + 0 ~ pdq(2,1,2)))
aus_fit4 %>%
  forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(title = "Forecast with ARIMA(2,1,2) w/o Constant")

report(aus_fit4)
## Series: Passengers 
## Model: ARIMA(2,1,2) w/ drift 
## Transformation: Passengers + 0 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2  constant
##       1.4694  -0.5103  -1.5736  0.6780    0.0650
## s.e.  0.3780   0.3558   0.3081  0.2688    0.0294
## 
## sigma^2 estimated as 4.748:  log likelihood=-87.74
## AIC=187.47   AICc=189.94   BIC=197.75

It’s difficult to tell whether the constant had any influence at all with the forecasts measurements.


E. Plot ARIMA(0,2,1) with a constant..

## Add a zero so constant can be excluded..
aus_fit5 <- aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(
        arima021 = ARIMA(Passengers ~ pdq(0,2,1)))
aus_fit5 %>%
  forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(title = "Forecast with ARIMA(0,2,1) w/Constant")

report(aus_fit5)
## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8756
## s.e.   0.0722
## 
## sigma^2 estimated as 4.671:  log likelihood=-87.8
## AIC=179.61   AICc=179.93   BIC=182.99

It seems like this model has a lower AICc value compared to the other ARIMA model, but the other forecasts like pretty smiliar to the above one.


9.8 For the US GDP

US <- global_economy %>%
  filter(Country == 'United States')
## Plot the United States GDP
US %>%
  autoplot(GDP) + labs(title = "United States GDP")


A. Find a transformation..

USlambda <- US |>
  features(GDP, features = guerrero) |>
  pull(lambda_guerrero)
US %>%
  autoplot(box_cox(GDP,USlambda))

The lambda value is 0.2 which is pretty close to a log-transformation of the data..


B. Fit a suitable ARIMA model

US %>%
  gg_tsdisplay(box_cox(GDP,USlambda),plot_type = "partial")

The data seems to be non-stationery, we see a slight increase in trend in the plot and the ACF plot has signficant lag at every point in time. With these plots we have to make our data stationery.


Make our model non-stationery now..

US %>%
  gg_tsdisplay(difference(box_cox(GDP,USlambda)),plot_type = "partial")
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

The data appears to be stationery now.. even though the residuals seems to be decaying towards 0 in the ACF plot. Glancing at the acf and pacf plot we can create an ARIMA(0,1,1) from the acf plot and an ARIMA(1,1,0) from the PACF plot. We will try these two alongside the stepwise and automated models..


Making Our models..

## Create our model..
US_fit <- US %>%
  model(
    arima011 = ARIMA(box_cox(GDP,USlambda) ~ pdq(0,1,1)),
    arima110 = ARIMA(box_cox(GDP,USlambda) ~ pdq(1,1,0)),
    stepwise = ARIMA(box_cox(GDP,USlambda)),
        search = ARIMA(box_cox(GDP,USlambda),stepwise = FALSE)
    
  )
glance(US_fit) %>%
  arrange(AICc) %>%
  select(.model:BIC)
## # A tibble: 4 × 6
##   .model   sigma2 log_lik   AIC  AICc   BIC
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 arima110  5479.   -325.  657.  657.  663.
## 2 stepwise  5479.   -325.  657.  657.  663.
## 3 search    5479.   -325.  657.  657.  663.
## 4 arima011  5689.   -326.  659.  659.  665.

Wow, it seems like arima110 did the best and even the search and stepwise method for ARIMA choose the same parameters for p,d,q. So arima110 will have to be the best value accordin to the AICc.


C. Try Some Other Plausible Models..

We can vary one of the p,q from our chosen model by +1, p,q both vary from current model +1, or include/exclude c from current model

US_fit2 <- US %>%
  model(
    arima112 = ARIMA(box_cox(GDP,USlambda) ~ pdq(1,1,2)),
    arima210 = ARIMA(box_cox(GDP,USlambda) ~ pdq(2,1,0)),
    arima211 = ARIMA(box_cox(GDP,USlambda) ~ pdq(2,1,1)),
    stepwise = ARIMA(box_cox(GDP,USlambda)),
        search = ARIMA(box_cox(GDP,USlambda),stepwise = FALSE)
    
  )
glance(US_fit2) %>%
  arrange(AICc) %>%
  select(.model:BIC)
## # A tibble: 5 × 6
##   .model   sigma2 log_lik   AIC  AICc   BIC
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 stepwise  5479.   -325.  657.  657.  663.
## 2 search    5479.   -325.  657.  657.  663.
## 3 arima210  5580.   -325.  659.  659.  667.
## 4 arima112  5630.   -325.  660.  661.  670.
## 5 arima211  5647.   -325.  660.  661.  671.

Seems like changing just made the model AICc values even worse.. so we stick with the better model of ARIMA 0,1,1


D. Choose Best Model and Diagnostics.

US_fit %>%
  select(arima110) %>%
  gg_tsresiduals()

The diagnostics looks good, there is no signifcant spikes and the data looks skewed.


Pormanteau Test

## degree of freedom is what P+Q is equal to so 0 + 1 = 1 
augment(US_fit) |>
  filter(.model=='arima110') |>
  features(.innov, ljung_box, lag = 10, dof = 1)
## # A tibble: 1 × 4
##   Country       .model   lb_stat lb_pvalue
##   <fct>         <chr>      <dbl>     <dbl>
## 1 United States arima110    3.81     0.923

We get a p-value of 0.9 so the residuals look like white noise (nice)


E. Produce forecasts of your fitted models..

US_fit %>%
  forecast(h = 10,level = NULL) %>%
  autoplot(US) + labs(title = "US GDP Forecast")

The forecasts appear to be reasonable since the data does displays an increasing trend.


F. Compare Results with ETS

US_fit3 <- US |>
  model(ETS(GDP),
        arima011 = ARIMA(GDP ~ pdq(0,1,1)))
glance(US_fit3) %>%
  arrange(AICc) %>%
  select(.model:BIC)
## # A tibble: 2 × 6
##   .model     sigma2 log_lik   AIC  AICc   BIC
##   <chr>       <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 arima011 7.74e+22  -1583. 3170. 3170. 3174.
## 2 ETS(GDP) 6.78e- 4  -1590. 3191. 3192. 3201.

It appears AICc wise, the arima model performed much slightly better than the ETS method..without any transformation applied..


Fin