Install libraries
The first plot suggests significant correlation, indicating that the data is not white noise. The second plot shows diminished correlation so it indicates the data is close to white noise. The third plot shows no significant correlation, so it indicates the data is white noise.
amazon_stock <- subset(gafa_stock,Symbol == "AMZN")
as_tsibble(amazon_stock,index = Date,key = Symbol)## # A tsibble: 1,258 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AMZN 2014-01-02 399. 399. 394. 398. 398. 2137800
## 2 AMZN 2014-01-03 398. 403. 396. 396. 396. 2210200
## 3 AMZN 2014-01-06 396. 397 388. 394. 394. 3170600
## 4 AMZN 2014-01-07 395. 398. 394. 398. 398. 1916000
## 5 AMZN 2014-01-08 398. 403 396. 402. 402. 2316500
## 6 AMZN 2014-01-09 404. 407. 398. 401. 401. 2103000
## 7 AMZN 2014-01-10 403. 404. 394. 398. 398. 2679500
## 8 AMZN 2014-01-13 398. 400. 388. 391. 391. 2844900
## 9 AMZN 2014-01-14 392. 399. 391. 398. 398. 2340100
## 10 AMZN 2014-01-15 399. 399. 393. 396. 396. 2678300
## # ℹ 1,248 more rows
This plot is non stationary because the graph is increasing over time
and there isn’t a constant mean.
## Warning: Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
## Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
The ACF plot further demonstrates the data is not stationary because
each lag is outside of the confidence interval. The first value in the
PACF is an outlier, which indicated the data may only need first-order
differencing.
a Turkish GDP from global_economy.
lambda_turkey <- turkey %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
# Apply the Box-Cox transformation to the GDP column in the tsibble
turkey <- turkey %>%
mutate(GDP_transformed = box_cox(GDP, lambda_turkey))
# Create the plots
original_plot <- autoplot(turkey, GDP) +
labs(title = "Time Series of Turkey's GDP") +
theme(plot.title = element_text(hjust = 0.5))
transformed_plot <- autoplot(turkey, GDP_transformed) +
labs(title = "Box-Cox Transformed GDP Series") +
theme(plot.title = element_text(hjust = 0.5))
# Arrange the plots vertically
ggarrange(original_plot, transformed_plot, ncol = 1)# Calculate the first and second-order differences on the Box-Cox transformed GDP series
turkey_diff <- turkey %>%
mutate(
GDP_d1 = difference(GDP_transformed),
GDP_d2 = difference(GDP_transformed, differences = 2)
)
# Create the plots
transformed_plot_turkey <- autoplot(turkey, GDP_transformed) +
labs(title = "Box-Cox Transformed GDP Series") +
theme(plot.title = element_text(hjust = 0.5))
first_diff_plot <- autoplot(turkey_diff, GDP_d1) +
labs(title = "First Order Differencing") +
theme(plot.title = element_text(hjust = 0.5))
second_diff_plot <- autoplot(turkey_diff, GDP_d2) +
labs(title = "Second Order Differencing") +
theme(plot.title = element_text(hjust = 0.5))
# Arrange the plots vertically
ggarrange(transformed_plot_turkey, first_diff_plot, second_diff_plot, ncol = 1)## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
It seems as though the first order differencing was sufficient, with a
box-cox transformation (lambda = 0.1572).
lambda_tasmania <- tasmania %>%
features(Takings, guerrero) %>%
pull(lambda_guerrero)
# Apply the Box-Cox transformation to the Takings column
tasmania <- tasmania %>%
mutate(Takings_transformed = box_cox(Takings, lambda_tasmania))## # A tibble: 1 × 2
## State nsdiffs
## <chr> <int>
## 1 Tasmania 1
original_plot <- autoplot(tasmania, Takings) +
labs(title = "Time Series of Tasmania's Accommodation Takings") +
theme(plot.title = element_text(hjust = 0.5))
transformed_plot_tasmania <- autoplot(tasmania, Takings_transformed) +
labs(title = "Box-Cox Transformed Takings Series") +
theme(plot.title = element_text(hjust = 0.5))
ggarrange(original_plot, transformed_plot_tasmania, ncol = 1)
The data still doesn’t look stationary and the unit root test indicates
we need one order of seasonal differencing.
tasmania %>%
gg_tsdisplay(difference(Takings_transformed,4),plot_type='partial',lag = 12)+
labs(title = "Seasonally Differenced Box-Cox Takings Series",y="")## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
The ACF plot and PACF still has significant spikes at lag 1. We still
need differencing to achieve stationary data.
tasmania %>%
gg_tsdisplay(difference(Takings_seasonal_diff),plot_type='partial',lag = 12)+
labs(title = "First Order Diff, Seasonally Differenced Box-Cox Transformed Takings Series",y="")## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
The goal is to achieve a stationary series, so for this set of data, the Box-Cox transformed Takings series (with lambda = 0.0018) required one round of seasonal differencing, followed by one round of regular differencing to achieve stationarity in the Tasmania dataset.
# plot
souvenirs %>%
gg_tsdisplay(Sales, plot_type='partial', lag = 36) +
labs(title = "Non-transformed Monthly Souvenir Sales")# calculate lambda
lambda_souvenirs <- souvenirs %>%
features(Sales, features = guerrero) %>%
pull(lambda_guerrero)## # A tibble: 1 × 1
## nsdiffs
## <int>
## 1 1
souvenirs %>%
gg_tsdisplay(difference(box_cox(Sales, lambda_souvenirs), 12), plot_type = 'partial', lag = 36) +
labs(title = TeX(paste0("Differenced Monthly Souvenir Sales with $\\lambda$ = ",
round(lambda_souvenirs, 2))))## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
gg_tsdisplay(Turnover, plot_type='partial', lag = 36) +
labs(title = "Non-transformed Retail Turnover")lambda_aus <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
# unit root test
myseries %>%
features(box_cox(Turnover, lambda_aus), unitroot_nsdiffs) ## # A tibble: 1 × 3
## State Industry nsdiffs
## <chr> <chr> <int>
## 1 Queensland Cafes, restaurants and takeaway food services 1
| ### 9.6 Simulate and plot some data from simple ARIMA models. |
|---|
| ### 9.7. Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011. |
| a. Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods. |
| ```r air_model <- aus_airpassengers %>% filter(Year < 2012) %>% model(ARIMA(Passengers)) |
| report(air_model) ``` |
## Series: Passengers ## Model: ARIMA(0,2,1) ## ## Coefficients: ## ma1 ## -0.8756 ## s.e. 0.0722 ## ## sigma^2 estimated as 4.671: log likelihood=-87.8 ## AIC=179.61 AICc=179.93 BIC=182.99 |
r air_model %>% forecast(h=10) %>% autoplot(aus_airpassengers) + labs(title = "Australian Aircraft Passengers with ARIMA(0,2,1)", y = "Passengers (in millions)") |
r air_model %>% gg_tsresiduals() + labs(title = "Residuals ARIMA(0,2,1)") |
| b. Write the model in terms of the backshift operator. |
| 𝑦𝑡=−0.876𝜖𝑡−1+𝜖𝑡 |
| c. Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a. |
| ```r a_mod_2 <-aus_airpassengers %>% filter(Year < 2012) %>% model(ARIMA(Passengers ~ pdq(0,1,0))) |
| a_mod_2 %>% forecast(h=10) %>% autoplot(aus_airpassengers) + labs(title = “Australian Aircraft Passengers with ARIMA(0,1,0)”, y = “Passengers (in millions)”) ``` |
r a_mod_2 %>% gg_tsresiduals() + labs(title = "Residuals for ARIMA(0,1,0)") |
| d. Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens. |
| ```r a_mod_3 <-aus_airpassengers %>% filter(Year < 2012) %>% model(ARIMA(Passengers ~ pdq(2,1,2))) |
| a_mod_3 %>% forecast(h=10) %>% autoplot(aus_airpassengers) + labs(title = “Australian Aircraft Passengers with ARIMA(2,1,2)”, y = “Passengers (in millions)”) ``` |
r a_mod_3 %>% gg_tsresiduals() + labs(title = "Residuals for ARIMA(2,1,2)") |
r #removing constant a_mod_4 <-aus_airpassengers %>% filter(Year < 2012) %>% model(ARIMA(Passengers ~ 0 + pdq(2,1,2))) |
## Warning: 1 error encountered for ARIMA(Passengers ~ 0 + pdq(2, 1, 2)) ## [1] non-stationary AR part from CSS
A warning pops up: “Warning: 1 error encountered for ARIMA(Passengers ~
0 + pdq(2, 1, 2)) [1] non-stationary AR part from CSS” and the slope
increases. |
| e. Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens? |
r a_mod_5 <-aus_airpassengers %>% filter(Year < 2012) %>% model(ARIMA(Passengers ~ 1 + pdq(0,2,1))) |
## Warning: Model specification induces a quadratic or higher order polynomial trend. ## This is generally discouraged, consider removing the constant or reducing the number of differences.
A warning pops up: The model induces a higher order polynomial trend
that is discouraged. It advises to remove the constant. |
global_economy %>%
filter(Code == "USA") %>%
gg_tsdisplay(GDP, plot_type='partial') +
labs(title = "Non-transformed United States GDP")lambda_usa <-global_economy %>%
filter(Code == "USA") %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)a_mod_110 <- global_economy %>%
filter(Code == "USA") %>%
model(ARIMA(box_cox(GDP, lambda_usa)))
report(a_mod_110)## Series: GDP
## Model: ARIMA(1,1,0) w/ drift
## Transformation: box_cox(GDP, lambda_usa)
##
## Coefficients:
## ar1 constant
## 0.4586 118.1822
## s.e. 0.1198 9.5047
##
## sigma^2 estimated as 5479: log likelihood=-325.32
## AIC=656.65 AICc=657.1 BIC=662.78
# Fit other models
a_mod_210 <- global_economy %>%
filter(Code == "USA") %>%
model(ARIMA(box_cox(GDP, lambda_usa) ~ pdq(2,1,0)))
# report(a_mod_210)
a_mod_111 <- global_economy %>%
filter(Code == "USA") %>%
model(ARIMA(box_cox(GDP, lambda_usa) ~ pdq(1,1,1)))
# report(a_mod_111)
a_mod_211 <- global_economy %>%
filter(Code == "USA") %>%
model(ARIMA(box_cox(GDP, lambda_usa) ~ pdq(2,1,1)))
report(a_mod_211)## Series: GDP
## Model: ARIMA(2,1,1) w/ drift
## Transformation: box_cox(GDP, lambda_usa)
##
## Coefficients:
## ar1 ar2 ma1 constant
## 1.1661 -0.2792 -0.7356 24.3346
## s.e. 0.3418 0.2108 0.3077 2.5572
##
## sigma^2 estimated as 5647: log likelihood=-325.14
## AIC=660.29 AICc=661.46 BIC=670.5
Based on the residual plots, The ARIMA(2,1,1) model is the best out of the 4.The residuals here are the closest to a normal distribution.
a_mod_211 %>%
forecast(h = 10) %>%
autoplot(global_economy %>% filter(Code == "USA")) +
labs(title = "Forecast for US GDP with ARIMA(2,1,1)", y = "GDP")The forecasts seem reasonable.
## Series: GDP
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9990876
## beta = 0.5011949
##
## Initial states:
## l[0] b[0]
## 448093333334 64917355687
##
## sigma^2: 7e-04
##
## AIC AICc BIC
## 3190.787 3191.941 3201.089
This ETS model has a higher AIC, AICc and BIC than the ARIMA(2,1,1)
model, indicating that the ARIMA model has a better fit.