## Import the following libraries..
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.3
## ✔ dplyr 1.1.2 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.2 ✔ fable 0.3.3
## ✔ ggplot2 3.4.2 ✔ fabletools 0.3.4
## Warning: package 'tsibble' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
The differences between the three figures is that the lag between each spikes are increasing with the amount of random numbers we are displaying. We can see that there are no significant spikes present within each acf plot indicating that the data is white noise.
The critical values are at different distances from the mean of 0 it is because we are sampling more and more random numbers and thus the spikes are getting smaller and smaller and the confidence intervals are getting narrower and narrower. The autocorrelations differ precisely because they are white-noise. If they were not white-noise then the significant spikes would be in identical positions within the plots.
gafa_stock %>%
filter(Symbol == "AMZN") %>%
autoplot(Close) + labs(title = "Close Prices Of Amazon Stocks")
The data displays an increasing trend with some seasonality involved.
## First we re-index the data
amzn_stock <- gafa_stock |>
filter(Symbol == "AMZN") |>
mutate(day = row_number()) |>
update_tsibble(index = day, regular = TRUE) %>%
select(Close)
amzn_stock %>%
gg_tsdisplay(plot_type = "partial")
## Plot variable not specified, automatically selected `y = Close`
Acoording to the plots of the Amazon stock closing prices we can see on our plot that the amazon closing prices have an increasing trend with some seasonality, this tells us that the data is non-stationary, glancing at the acf and pacf plots we can see significant spikes for each lag in the acf plot as well as some minor significant spikes that were also captured in the pacf plot. Because of this, we can try to difference the series either by first or second-order in order to ensure that the series is stationary and or the residuals look like white noise.
Turkey_GDP <- global_economy %>%
filter(Country == "Turkey")
Turkey_GDP %>%
autoplot(GDP) + labs(title = "Turkey GDP")
lambda <- Turkey_GDP |>
features(GDP, features = guerrero) |>
pull(lambda_guerrero)
## Looks smiliar to a log transformation
Turkey_GDP %>%
autoplot(box_cox(GDP,lambda)) + labs(title = "GDP Transformed (Box-Cox)")
Turkey_GDP %>%
ACF(box_cox(GDP,lambda)) %>%
autoplot() + labs(title = "AMZN Closing Stock Price (logged)")
Seems like logging it increases the lags to be more signficant
## Applying The Difference
Turkey_GDP %>%
gg_tsdisplay(difference(box_cox(GDP,lambda)),plot_type = "partial")
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
Seems like applying the difference once made the amazon closing stock price into a white-noise series.
## Use a lag value of 10
Turkey_GDP |>
mutate(diff_GDP = difference(box_cox(GDP,lambda))) |>
features(diff_GDP, ljung_box, lag = 10)
## # A tibble: 1 × 3
## Country lb_stat lb_pvalue
## <fct> <dbl> <dbl>
## 1 Turkey 5.84 0.829
Since the p-value is not smaller than 0.05 we can deduce that the series is now stationary.
Tasmania <- aus_accommodation %>%
filter(State == "Tasmania")
Tasmania %>%
autoplot(Takings) + labs(title = "Tasmaina Taking")
The time series data shows increasing trend, variance and seasonality
Tlambda <- Tasmania |>
features(Takings, features = guerrero) |>
pull(lambda_guerrero)
## Looks smiliar to a log transformation
Tasmania %>%
autoplot(box_cox(Takings,Tlambda)) + labs(title = "Takings Transformed (Box-Cox)")
The variance seems to be stabilized on both sides of the chart, using a log transformation again.
Tasmania %>%
ACF(box_cox(Takings,Tlambda)) %>%
autoplot() + labs(title = "Tasmania Takings (logged)")
There appears to be some signficants spikes between every other lag points.
We apply a seasonal difference
Tasmania %>%
gg_tsdisplay(difference(box_cox(Takings,Tlambda),12),plot_type = "partial")
## Warning: Removed 12 rows containing missing values (`geom_line()`).
## Warning: Removed 12 rows containing missing values (`geom_point()`).
The ACF shows some spikes crossing over the lines and the pacf has captured a big significant spike at the second lag.
## Use a lag value of 10
Tasmania |>
mutate(diff_Tak = difference(box_cox(Takings,Tlambda),12)) |>
features(diff_Tak, ljung_box, lag = 10)
## # A tibble: 1 × 3
## State lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 Tasmania 234. 0
The p-value is less than 0 which means we have to apply a first-order differencing in order to make it stationary.
Tasmania %>%
gg_tsdisplay(difference(difference(box_cox(Takings,Tlambda),12),1),plot_type = "partial")
## Warning: Removed 13 rows containing missing values (`geom_line()`).
## Warning: Removed 13 rows containing missing values (`geom_point()`).
## Use a lag value of 10
Tasmania |>
mutate(diff_Tak = difference(difference(box_cox(Takings,Tlambda),12),1)) |>
features(diff_Tak, ljung_box, lag = 10)
## # A tibble: 1 × 3
## State lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 Tasmania 10.4 0.409
After applying a seasonal difference and then we applied a first-order differencing and the p-value is significant..
souvenirs
## # A tsibble: 84 x 2 [1M]
## Month Sales
## <mth> <dbl>
## 1 1987 Jan 1665.
## 2 1987 Feb 2398.
## 3 1987 Mar 2841.
## 4 1987 Apr 3547.
## 5 1987 May 3753.
## 6 1987 Jun 3715.
## 7 1987 Jul 4350.
## 8 1987 Aug 3566.
## 9 1987 Sep 5022.
## 10 1987 Oct 6423.
## # ℹ 74 more rows
souvenirs %>%
autoplot(Sales) + labs(title ="Souvenirs Sales")
Slambda <- souvenirs |>
features(Sales, features = guerrero) |>
pull(lambda_guerrero)
souvenirs %>%
autoplot(box_cox(Sales,Slambda)) + labs(title ="Souvenirs Sales (Box_Cox)")
Seems like applying the lambda has improved the trend shaped and seasonality
souvenirs %>%
ACF(box_cox(Sales,Slambda)) %>%
autoplot() + labs(title = "Souvenirs Sales (logged)")
There appears to be some significant lags from the ACF plot before we difference it.
souvenirs %>%
gg_tsdisplay(difference(box_cox(Sales,Slambda)),plot_type = "partial")
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Use a lag value of 10
souvenirs |>
mutate(diff_Sales = difference(box_cox(Sales,Slambda))) |>
features(diff_Sales, ljung_box, lag = 10)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 30.1 0.000828
We have a significant level of 0.0008 which tells us that the data has not been transformed into a stationary series we need to perform a second-order difference again.
souvenirs %>%
gg_tsdisplay(difference(difference(box_cox(Sales,Slambda),12),1),plot_type = "partial")
## Warning: Removed 13 rows containing missing values (`geom_line()`).
## Warning: Removed 13 rows containing missing values (`geom_point()`).
souvenirs |>
mutate(diff_Sales = difference(difference(box_cox(Sales,Slambda),12),1)) |>
features(diff_Sales, ljung_box, lag = 10)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 22.7 0.0120
The pormanteau has gone up but the p-value is still significant meaning we would have to apply a higher-order differencing to make the data stationery.
souvenirs |>
features(box_cox(Sales,Slambda),unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 1.79 0.01
souvenirs |>
features(difference(box_cox(Sales,Slambda)),unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0631 0.1
Looking at the kpss test, we can see that once we apply a difference the p-value is 0.1 which is according to the textbook the differenced data appears to be stationery. (But then why is the pormanteau test is giving me a small p-value in this case?)
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
autoplot(Turnover) + labs(title = "Retail Data")
The time series has an increasing trend and a strong seasonality.
Saleslambda <- myseries |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
Applying The Lambda value
myseries %>%
autoplot(box_cox(Turnover,Saleslambda)) + labs(title = "Box_Cox Sales")
Applying the transformation again it has stabilized the variance again.
myseries %>%
gg_tsdisplay(box_cox(Turnover,Saleslambda),plot_type = "partial")
The acf has significant lags as well as some on the PACF..
## The lag parameter displays up to lagged values to 36.
myseries %>%
gg_tsdisplay(difference(box_cox(Turnover,Saleslambda),12),plot_type = "partial",lag =36)
## Warning: Removed 12 rows containing missing values (`geom_line()`).
## Warning: Removed 12 rows containing missing values (`geom_point()`).
The acf plot is decreasing or it is decaying towards zero and we have signifcant spikes every 12th lag on the pacf plot, hence clearly the data is still non-stationery. So we should take a further first difference.
myseries %>%
gg_tsdisplay(difference(box_cox(Turnover,Saleslambda),12) |> difference(),plot_type = "partial",lag = 36)
## Warning: Removed 13 rows containing missing values (`geom_line()`).
## Warning: Removed 13 rows containing missing values (`geom_point()`).
myseries |>
features(box_cox(Turnover,Saleslambda),unitroot_ndiffs)
## # A tibble: 1 × 3
## State Industry ndiffs
## <chr> <chr> <int>
## 1 Northern Territory Clothing, footwear and personal accessory retailing 1
myseries |>
mutate(diff_close = difference(box_cox(Turnover,Saleslambda),12) |> difference()) |>
features(diff_close, unitroot_kpss)
## # A tibble: 1 × 4
## State Industry kpss_stat kpss_pvalue
## <chr> <chr> <dbl> <dbl>
## 1 Northern Territory Clothing, footwear and personal acce… 0.0165 0.1
According to the kpss unitroot test, It seems applying the second difference this time, the p-value is 0.1, we can conclude that the differenced data appears stationery.
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
ari <- function(phi,sigmasq){
set.seed(10)
y <- numeric(100)
e <- rnorm(100,sigmasq)
for(i in 2:100)
## This is the formula..
y[i] <- phi*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
return(sim)
}
phi <- 0.6
sigma2 <- 1
ari(0.6,1) %>%
autoplot() + labs(title = "Time Series with phi = 0.6")
## Plot variable not specified, automatically selected `.vars = y`
ari(0.7,1) %>%
autoplot() + labs(title = "Time Series with phi = 0.7")
## Plot variable not specified, automatically selected `.vars = y`
ari(0.9,1) %>%
autoplot() + labs(title = "Time Series with phi = 0.9")
## Plot variable not specified, automatically selected `.vars = y`
ari(0.3,1) %>%
autoplot() + labs(title = "Time Series with phi = 0.3")
## Plot variable not specified, automatically selected `.vars = y`
ari(0.2,1) %>%
autoplot() + labs(title = "Time Series with phi = 0.2")
## Plot variable not specified, automatically selected `.vars = y`
It seems like a higher value of phi the data seems to display less and less cyclic behavior, where as lower values of phi seems to displays sharp and sharper cyclic behavior.
mov_avg <- function(theta,sigmasq){
set.seed(15)
y <- numeric(100)
e <- rnorm(100,sigmasq)
for (i in 2:100)
## This is the formula.. for moving average..
y[i] <- e[i] + theta * e[i-1]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
}
mov_avg(0.6,1) %>%
autoplot() + labs(title = "Moving Average Time Plot with Theta = 0.6")
## Plot variable not specified, automatically selected `.vars = y`
mov_avg(0.9,1) %>%
autoplot() + labs(title = "Moving Average with theta = 0.9")
## Plot variable not specified, automatically selected `.vars = y`
mov_avg(0.1,1) %>%
autoplot() + labs(title = "Moving Average with theta = 0.1")
## Plot variable not specified, automatically selected `.vars = y`
It’s difficult to tell but lower values of theta there are more sharp peaks and declines but as theta increases there are less pronounced peaks.
arma <- function(theta,phi,sigmasq){
set.seed(10)
y <- numeric(100)
e <- rnorm(100,sigmasq)
for (i in 2:100)
## We combine the two formula together..
y[i] <- e[i] + theta * e[i-1] + phi * y[i-1]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
}
arma(0.6,0.6,1) %>%
gg_tsdisplay(plot_type = "partial") + labs(title = "ARMA(1,1) plot")
## Plot variable not specified, automatically selected `y = y`
arma2 <- function(theta,theta2,sigmasq){
set.seed(10)
y <- numeric(100)
e <- rnorm(100,sigmasq)
for (i in 3:100)
## We combine the two formula together..
y[i] <- e[i] + theta * e[i-1] + theta2 * e[i-2]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
}
arma2(-0.8,0.3,1) %>%
gg_tsdisplay(plot_type = "partial") + labs(title = "AR(2)")
## Plot variable not specified, automatically selected `y = y`
The arma2 series seems to be oscilatting with higher variance as the time goes on, and the acf seems to displays significant lags at different time points while the arma(1,1) seems to have a slight increase in trend with the acf plot decaying towards 0 so the plot doesn’t appear to be stationary.
aus_airpassengers
## # A tsibble: 47 x 2 [1Y]
## Year Passengers
## <dbl> <dbl>
## 1 1970 7.32
## 2 1971 7.33
## 3 1972 7.80
## 4 1973 9.38
## 5 1974 10.7
## 6 1975 11.1
## 7 1976 10.9
## 8 1977 11.3
## 9 1978 12.1
## 10 1979 13.0
## # ℹ 37 more rows
aus_fit <- aus_airpassengers %>%
filter(Year < 2012) %>%
model(stepwise = ARIMA(Passengers),
search = ARIMA(Passengers,stepwise = FALSE))
report(aus_fit[1])
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8756
## s.e. 0.0722
##
## sigma^2 estimated as 4.671: log likelihood=-87.8
## AIC=179.61 AICc=179.93 BIC=182.99
report(aus_fit[2])
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8756
## s.e. 0.0722
##
## sigma^2 estimated as 4.671: log likelihood=-87.8
## AIC=179.61 AICc=179.93 BIC=182.99
glance(aus_fit) %>%
arrange(AICc) %>%
select(.model:BIC)
## # A tibble: 2 × 6
## .model sigma2 log_lik AIC AICc BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stepwise 4.67 -87.8 180. 180. 183.
## 2 search 4.67 -87.8 180. 180. 183.
Using the automated method with ARIMA with stepwise included and excluded we seems to have the same metrics so there appears to be no diffrence between the search and stepwise method.
aus_fit %>%
select(search) %>%
gg_tsresiduals()
Looking at the residuals we can see that the data appears to be stationery.
## degree of freedom is what P+Q is equal to so 0 + 1 = 1
augment(aus_fit) |>
filter(.model=='search') |>
features(.innov, ljung_box, lag = 10, dof = 1)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 search 6.00 0.740
Using the pormanteau test with a dof of 1 we can see that the p-value is not signficant telling us that the residuals look like white noise.
aus_fit |>
forecast(h=10) |>
filter(.model=='search') |>
autoplot(aus_airpassengers) + labs(title = "Forecasts For the next 10 years")
Rereading the chapters
\[ (1-B)^2y_t = c + (1 -0.87B)\epsilon_t \]
I believe this is the backshift operation since we performed since d = 2 and the coefficent for moving average is -0.87
aus_fit2 <- aus_airpassengers %>%
filter(Year < 2012) %>%
model(
arima010 = ARIMA(Passengers ~ pdq(0,1,0)))
aus_fit2 %>%
forecast(h=10) %>%
autoplot(aus_airpassengers) +
labs(title = "Forecast with ARIMA(0,1,0)")
report(aus_fit2)
## Series: Passengers
## Model: ARIMA(0,1,0) w/ drift
##
## Coefficients:
## constant
## 1.3669
## s.e. 0.3319
##
## sigma^2 estimated as 4.629: log likelihood=-89.08
## AIC=182.17 AICc=182.48 BIC=185.59
The forecasts seems to be influenced by the most-recent observation as opposed to the searchwise ARIMA where slightly older observations are influencing the forecast.
aus_fit3 <- aus_airpassengers %>%
filter(Year < 2012) %>%
model(
arima212 = ARIMA(Passengers ~ pdq(2,1,2)))
aus_fit3 %>%
forecast(h=10) %>%
autoplot(aus_airpassengers) +
labs(title = "Forecast with ARIMA(2,1,2)")
report(aus_fit3)
## Series: Passengers
## Model: ARIMA(2,1,2) w/ drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## 1.4694 -0.5103 -1.5736 0.6780 0.0650
## s.e. 0.3780 0.3558 0.3081 0.2688 0.0294
##
## sigma^2 estimated as 4.748: log likelihood=-87.74
## AIC=187.47 AICc=189.94 BIC=197.75
## Add a zero so constant can be excluded..
aus_fit4 <- aus_airpassengers %>%
filter(Year < 2012) %>%
model(
arima212 = ARIMA(Passengers + 0 ~ pdq(2,1,2)))
aus_fit4 %>%
forecast(h=10) %>%
autoplot(aus_airpassengers) +
labs(title = "Forecast with ARIMA(2,1,2) w/o Constant")
report(aus_fit4)
## Series: Passengers
## Model: ARIMA(2,1,2) w/ drift
## Transformation: Passengers + 0
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## 1.4694 -0.5103 -1.5736 0.6780 0.0650
## s.e. 0.3780 0.3558 0.3081 0.2688 0.0294
##
## sigma^2 estimated as 4.748: log likelihood=-87.74
## AIC=187.47 AICc=189.94 BIC=197.75
It’s difficult to tell whether the constant had any influence at all with the forecasts measurements.
## Add a zero so constant can be excluded..
aus_fit5 <- aus_airpassengers %>%
filter(Year < 2012) %>%
model(
arima021 = ARIMA(Passengers ~ pdq(0,2,1)))
aus_fit5 %>%
forecast(h=10) %>%
autoplot(aus_airpassengers) +
labs(title = "Forecast with ARIMA(0,2,1) w/Constant")
report(aus_fit5)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8756
## s.e. 0.0722
##
## sigma^2 estimated as 4.671: log likelihood=-87.8
## AIC=179.61 AICc=179.93 BIC=182.99
It seems like this model has a lower AICc value compared to the other ARIMA model, but the other forecasts like pretty smiliar to the above one.
US <- global_economy %>%
filter(Country == 'United States')
## Plot the United States GDP
US %>%
autoplot(GDP) + labs(title = "United States GDP")
USlambda <- US |>
features(GDP, features = guerrero) |>
pull(lambda_guerrero)
US %>%
autoplot(box_cox(GDP,USlambda))
The lambda value is 0.2 which is pretty close to a log-transformation of the data..
US %>%
gg_tsdisplay(box_cox(GDP,USlambda),plot_type = "partial")
The data seems to be non-stationery, we see a slight increase in trend in the plot and the ACF plot has signficant lag at every point in time. With these plots we have to make our data stationery.
US %>%
gg_tsdisplay(difference(box_cox(GDP,USlambda)),plot_type = "partial")
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
The data appears to be stationery now.. even though the residuals seems to be decaying towards 0 in the ACF plot. Glancing at the acf and pacf plot we can create an ARIMA(0,1,1) from the acf plot and an ARIMA(1,1,0) from the PACF plot. We will try these two alongside the stepwise and automated models..
## Create our model..
US_fit <- US %>%
model(
arima011 = ARIMA(box_cox(GDP,USlambda) ~ pdq(0,1,1)),
arima110 = ARIMA(box_cox(GDP,USlambda) ~ pdq(1,1,0)),
stepwise = ARIMA(box_cox(GDP,USlambda)),
search = ARIMA(box_cox(GDP,USlambda),stepwise = FALSE)
)
glance(US_fit) %>%
arrange(AICc) %>%
select(.model:BIC)
## # A tibble: 4 × 6
## .model sigma2 log_lik AIC AICc BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima110 5479. -325. 657. 657. 663.
## 2 stepwise 5479. -325. 657. 657. 663.
## 3 search 5479. -325. 657. 657. 663.
## 4 arima011 5689. -326. 659. 659. 665.
Wow, it seems like arima110 did the best and even the search and stepwise method for ARIMA choose the same parameters for p,d,q. So arima110 will have to be the best value accordin to the AICc.
We can vary one of the p,q from our chosen model by +1, p,q both vary from current model +1, or include/exclude c from current model
US_fit2 <- US %>%
model(
arima112 = ARIMA(box_cox(GDP,USlambda) ~ pdq(1,1,2)),
arima210 = ARIMA(box_cox(GDP,USlambda) ~ pdq(2,1,0)),
arima211 = ARIMA(box_cox(GDP,USlambda) ~ pdq(2,1,1)),
stepwise = ARIMA(box_cox(GDP,USlambda)),
search = ARIMA(box_cox(GDP,USlambda),stepwise = FALSE)
)
glance(US_fit2) %>%
arrange(AICc) %>%
select(.model:BIC)
## # A tibble: 5 × 6
## .model sigma2 log_lik AIC AICc BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stepwise 5479. -325. 657. 657. 663.
## 2 search 5479. -325. 657. 657. 663.
## 3 arima210 5580. -325. 659. 659. 667.
## 4 arima112 5630. -325. 660. 661. 670.
## 5 arima211 5647. -325. 660. 661. 671.
Seems like changing just made the model AICc values even worse.. so we stick with the better model of ARIMA 0,1,1
US_fit %>%
select(arima110) %>%
gg_tsresiduals()
The diagnostics looks good, there is no signifcant spikes and the data looks skewed.
## degree of freedom is what P+Q is equal to so 0 + 1 = 1
augment(US_fit) |>
filter(.model=='arima110') |>
features(.innov, ljung_box, lag = 10, dof = 1)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States arima110 3.81 0.923
We get a p-value of 0.9 so the residuals look like white noise (nice)
US_fit %>%
forecast(h = 10,level = NULL) %>%
autoplot(US) + labs(title = "US GDP Forecast")
The forecasts appear to be reasonable since the data does displays an increasing trend.
US_fit3 <- US |>
model(ETS(GDP),
arima011 = ARIMA(GDP ~ pdq(0,1,1)))
glance(US_fit3) %>%
arrange(AICc) %>%
select(.model:BIC)
## # A tibble: 2 × 6
## .model sigma2 log_lik AIC AICc BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima011 7.74e+22 -1583. 3170. 3170. 3174.
## 2 ETS(GDP) 6.78e- 4 -1590. 3191. 3192. 3201.
It appears AICc wise, the arima model performed much slightly better than the ETS method..without any transformation applied..