For each of the graphs, the data appears to be white-noise, based on the fact that non of the lags are in the statistially significant region.
A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.
amzn_stock <- gafa_stock %>%
filter(Symbol == 'AMZN')
amzn_stock %>%
autoplot(Close)
amzn_stock %>%
gg_tsdisplay(Close, plot_type = 'partial')
## Warning: Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
## Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
When looking at the plot of the stock’s closing price, we see that the
price is increasing as time goes on and thus would result in a mean
Close that is increasing as a function of time. Additionally, when
looking at the ACF chart, we see that there’s is statistically
significant autocorrelation in the time series throughout the entire
set.
For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.
For this plot, the Box-Cox transformation resulted in a lambda = 0
turkey_data <- global_economy %>%
filter(Country == 'Turkey')
turkey_data %>% autoplot(GDP)
bc <- BoxCoxTrans(turkey_data$GDP)
(lambda <- bc$lambda)
## [1] 0
turkey_data %>%
mutate(GDP_transformed = log(GDP)) %>%
autoplot(GDP_transformed)
turkey_data %>%
autoplot(difference(difference(log(GDP))))
## Warning: Removed 2 rows containing missing values (`geom_line()`).
For this plot, the appropriate lambda was 0.3
data("aus_accommodation")
tasmania_accomodations <- aus_accommodation %>%
filter(State == 'Tasmania')
tasmania_accomodations %>%
autoplot(Takings)
bc <- BoxCoxTrans(tasmania_accomodations$Takings)
(lambda <- bc$lambda)
## [1] 0.3
tasmania_accomodations %>%
mutate(Takings_transformed = (Takings^lambda-1)/lambda) %>%
autoplot(Takings_transformed)
tasmania_accomodations %>%
mutate(Takings_transformed = (Takings^lambda-1)/lambda) %>%
autoplot(difference(difference(Takings_transformed,4)))
## Warning: Removed 5 rows containing missing values (`geom_line()`).
For this plot, the appropriate lambda was -0.2
souvenirs %>% autoplot(Sales)
bc <- BoxCoxTrans(souvenirs$Sales)
(lambda <- bc$lambda)
## [1] -0.2
souvenirs %>%
mutate(Sales_transformed = (Sales^lambda-1)/lambda) %>%
autoplot(Sales_transformed)
souvenirs %>%
mutate(Sales_transformed = (Sales^lambda-1)/lambda) %>%
autoplot(difference(difference(difference(Sales_transformed,12))))
## Warning: Removed 14 rows containing missing values (`geom_line()`).
For your retail data (from Exercise 7 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.
For this data, I used a seasonal difference of 12 and then a secondar difference of 1
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
bc <- BoxCoxTrans(myseries$Turnover)
lambda <- bc$lambda
myseries %>%
mutate(Turnover_trans = (Turnover^lambda-1)/lambda) %>%
autoplot(Turnover_trans)
myseries %>%
mutate(Turnover_trans = (Turnover^lambda-1)/lambda) %>%
autoplot(difference(difference(Turnover_trans,12)))
## Warning: Removed 13 rows containing missing values (`geom_line()`).
y <- numeric(100)
e <- rnorm(100)
phi_1 <- 0.6
for(i in 2:100)
y[i] <- phi_1*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
sim %>% autoplot()
## Plot variable not specified, automatically selected `.vars = y`
y <- numeric(100)
e <- rnorm(100)
phi_2 <- 0.3
for(i in 2:100)
y[i] <- phi_2*y[i-1] + e[i]
sim2 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
phi_3 <- 0.9
for(i in 2:100)
y[i] <- phi_3*y[i-1] + e[i]
sim3 <- tsibble(idx = seq_len(100), y = y, index = idx)
sim %>% autoplot()
## Plot variable not specified, automatically selected `.vars = y`
sim2 %>% autoplot()
## Plot variable not specified, automatically selected `.vars = y`
sim3 %>% autoplot()
## Plot variable not specified, automatically selected `.vars = y`
y <- numeric(100)
e <- rnorm(100,1)
theta_1 = 0.6
for(i in 2:100)
y[i] <- y[i-1] + theta_1*e[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
sim %>% autoplot()
## Plot variable not specified, automatically selected `.vars = y`
y <- numeric(100)
e <- rnorm(100,1)
phi_1 <- 0.6
theta_1 <- 0.6
for(i in 2:100)
y[i] <- phi_1*y[i-1] + theta_1*e[i-1] + e[i]
sim1 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100,1)
phi_1 <- -0.8
phi_2 <- 0.3
for(i in 2:100)
y[i] <- phi_1*y[i-1] + phi_2*y[i-1] + e[i]
sim2 <- tsibble(idx = seq_len(100), y = y, index = idx)
sim1 %>% autoplot()
## Plot variable not specified, automatically selected `.vars = y`
sim2 %>% autoplot()
## Plot variable not specified, automatically selected `.vars = y`
Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.
train <- aus_airpassengers %>%
filter(Year <= 2011)
test <- aus_airpassengers %>%
filter(Year > 2011)
train %>%
gg_tsdisplay(difference(log(Passengers)), plot_type='partial')
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
fit <- train %>%
model(arima110 = ARIMA(log(Passengers) ~ pdq(1,1,0)),
stepwise = ARIMA(log(Passengers)),
search = ARIMA(Passengers, stepwise=FALSE))
glance(fit)
## # A tibble: 3 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima110 0.00576 48.6 -91.2 -90.5 -86.0 <cpl [1]> <cpl [0]>
## 2 stepwise 0.00562 48.5 -93.1 -92.8 -89.7 <cpl [0]> <cpl [0]>
## 3 search 4.67 -87.8 180. 180. 183. <cpl [0]> <cpl [1]>
fit_new <- train %>%
model(arima021 = ARIMA(log(Passengers) ~ pdq(0,2,1)))
fit_new %>%
forecast(h=10) %>%
autoplot(train)
The best model was the model selected by the function, which was an ARIMA(0,2,1) model
\(y_{t+1} = (1-B)^2*log(Passengers)\)
The forecast appears pretty much the same as the model selected in part a
fit <- train %>%
model(ARIMA(log(Passengers) ~ pdq(0,1,0)))
fit %>% forecast(h=10) %>%
autoplot(train)
fit1 <- train %>%
model(ARIMA(log(Passengers) ~ 1 + pdq(2,1,2)))
fit2 <- train %>%
model(ARIMA(log(Passengers) ~ 0 + pdq(2,1,0)))
fit1 %>% forecast(h=10) %>%
autoplot(train)
fit2 %>% forecast(h=10) %>%
autoplot(train)
When I use a constant in the model, I receive a warning
fit <- train %>%
model(ARIMA(log(Passengers) ~ pdq(0,2,1)))
fc <- fit %>% forecast(h=10)
fit %>%
forecast(h=10) %>%
autoplot(train)
For the United States GDP series (from global_economy):
us_economy <- global_economy %>%
filter(Country == 'United States') %>%
mutate(GDP = GDP/1e6)
us_economy %>% autoplot(GDP)
bc <- BoxCoxTrans(us_economy$GDP)
lambda <- bc$lambda
us_economy <- us_economy %>%
mutate(GDP_trans = (GDP^lambda-1)/lambda)
us_economy %>%
gg_tsdisplay(difference(difference(GDP_trans)), plot_type='partial')
## Warning: Removed 2 rows containing missing values (`geom_line()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
model_fit <- us_economy %>%
model(arima220 = ARIMA(GDP_trans ~ 0 + pdq(2,2,0)),
arima021 = ARIMA(GDP_trans ~ 0 + pdq(0,2,1)),
arima120 = ARIMA(GDP_trans ~ 0 + pdq(1,2,0)),
step = ARIMA(GDP_trans),
search = ARIMA(GDP_trans, stepwise = FALSE))
glance(model_fit) %>% arrange(AICc)
## # A tibble: 5 × 9
## Country .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 United States step 0.180 -31.1 68.2 68.7 74.3 <cpl [1]> <cpl [0]>
## 2 United States search 0.180 -31.1 68.2 68.7 74.3 <cpl [1]> <cpl [0]>
## 3 United States arima021 0.191 -32.8 69.6 69.9 73.7 <cpl [0]> <cpl [1]>
## 4 United States arima220 0.206 -34.2 74.5 75.0 80.6 <cpl [2]> <cpl [0]>
## 5 United States arima120 0.216 -36.1 76.2 76.4 80.2 <cpl [1]> <cpl [0]>
model_fit
## # A mable: 1 x 6
## # Key: Country [1]
## Country arima220 arima021 arima120 step
## <fct> <model> <model> <model> <model>
## 1 United S… <ARIMA(2,2,0)> <ARIMA(0,2,1)> <ARIMA(1,2,0)> <ARIMA(1,1,0) w/ drift>
## # ℹ 1 more variable: search <model>
SEE ABOVE
fit <- us_economy %>%
model(ARIMA(GDP_trans ~ pdq(1,1,0) ))
fit %>% gg_tsresiduals()
Yes, the forecast appears reasonable.
fit %>% forecast(h=10) %>%
autoplot(us_economy)
fit <- us_economy %>%
model(ETS(GDP_trans))
fit %>% forecast(h=10) %>%
autoplot(us_economy)
report(fit)
## Series: GDP_trans
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.3590179
##
## Initial states:
## l[0] b[0]
## 64.3256 0.8176971
##
## sigma^2: 0
##
## AIC AICc BIC
## 142.0720 143.2259 152.3742