Figure 8.31 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers. a. Explain the differences among these figures. Do they all indicate that the data are white noise?
A classic example of a non-stationary series is the daily closing IBM stock price series (data set ibmclose). Use R to plot the daily closing prices for IBM stock and the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.
For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data. a. usnetelec b. usgdp c. mcopper d. enplanements e. visitors
##
## Box-Ljung test
##
## data: diff(usnetelec)
## X-squared = 0.8508, df = 1, p-value = 0.3563
## Warning in kpss.test(diff(usnetelec)): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(usnetelec)
## KPSS Level = 0.15848, Truncation lag parameter = 3, p-value = 0.1
# kpss test result shows that first differencing made the data stationary
# b.
autoplot(usgdp) # linearly increasing series
##
## Box-Ljung test
##
## data: diff(usgdp)
## X-squared = 39.187, df = 1, p-value = 3.85e-10
## [1] 2
##
## Box-Ljung test
##
## data: diff(diff(usgdp))
## X-squared = 53.294, df = 1, p-value = 2.872e-13
## Warning in kpss.test(diff(diff(usnetelec))): p-value greater than printed
## p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(diff(usnetelec))
## KPSS Level = 0.098532, Truncation lag parameter = 3, p-value = 0.1
# kpss test result shows that first and second differencing made the data stationary
# c.
autoplot(mcopper) # Increasing trend and bigger variation for bigger prices
##
## Box-Ljung test
##
## data: diff(BoxCox(mcopper, lambda_mcopper))
## X-squared = 57.517, df = 1, p-value = 3.353e-14
## Warning in kpss.test(diff(BoxCox(mcopper, lambda_mcopper))): p-value
## greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(BoxCox(mcopper, lambda_mcopper))
## KPSS Level = 0.057275, Truncation lag parameter = 6, p-value = 0.1
# kpss test result shows that differencing with Box-Cox transformation made the data stationary.
# d.
autoplot(enplanements) # Series have seasonality and increasing trend
## [1] 1
## [1] 1
##
## Box-Ljung test
##
## data: diff(diff(BoxCox(enplanements, lambda_enplanements), lag = 12))
## X-squared = 29.562, df = 1, p-value = 5.417e-08
## Warning in kpss.test(diff(diff(BoxCox(enplanements, lambda_enplanements), :
## p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(diff(BoxCox(enplanements, lambda_enplanements), lag = 12))
## KPSS Level = 0.042424, Truncation lag parameter = 5, p-value = 0.1
# kpss test result shows that differencings with Box-Cox transformation made the data stationary
# e.
autoplot(visitors) # Series have seasonality and increasing trend
## [1] 1
## [1] 1
##
## Box-Ljung test
##
## data: diff(diff(BoxCox(visitors, lambda_visitors), lag = 12))
## X-squared = 21.804, df = 1, p-value = 3.02e-06
## Warning in kpss.test(diff(diff(BoxCox(visitors, lambda_visitors), lag =
## 12))): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(diff(BoxCox(visitors, lambda_visitors), lag = 12))
## KPSS Level = 0.015833, Truncation lag parameter = 4, p-value = 0.1
For your retail data (from Exercise 3 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.
library(fpp2)
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
retail_ts <- ts(retaildata[,"A3349873A"], frequency=12, start=c(1982,4))
autoplot(retail_ts) # the data have increasing trend and strong seasonality, variations are bigger for larger numbers.
# Series needs to use first differencing and seasonal differencing. Plus Box-Cox transformation.
lambda_retail_ts <- BoxCox.lambda(retail_ts)
ndiffs(retail_ts)
## [1] 1
## [1] 1
##
## Box-Ljung test
##
## data: diff(diff(BoxCox(retail_ts, lambda_retail_ts), lag = 12))
## X-squared = 13.573, df = 1, p-value = 0.0002294
## Warning in kpss.test(diff(diff(BoxCox(retail_ts, lambda_retail_ts), lag =
## 12))): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(diff(BoxCox(retail_ts, lambda_retail_ts), lag = 12))
## KPSS Level = 0.013817, Truncation lag parameter = 5, p-value = 0.1
Use R to simulate and plot some data from simple ARIMA models. a. Use the following R code to generate data from an AR(1) model with \(\phi_1\) = 0.6 and \(\sigma^2\) = 1. The process starts with \(y_1\) = 0
y <- ts(numeric(100)) e <- rnorm(100) for(i in 2:100) y[i] <- 0.6*y[i-1] + e[i]
library(fpp2)
# a.
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
# b.
ar1_mod <- function(phi1){
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100){
y[i] <- phi1*y[i-1] + e[i]
}
return(y)
}
autoplot(ar1_mod(0.3), series = "0.3") + geom_line(size = 1, colour = "red") +
autolayer(y, series = "0.6", size = 1) +
autolayer(ar1_mod(0.9), size = 1, series = "0.9") +
ylab("AR(1) Models") +
guides(colour = guide_legend(title = "Phi1"))
# The variation from y increases, as phi increases
# c.
ma1_mod <- function(theta1){
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100){
y[i] <- theta1*e[i-1] + e[i]
}
return(y)
}
# d.
autoplot(ma1_mod(0.3), series = "0.3") + geom_line(size = 1, colour = "red") +
autolayer(y, series = "0.6", size = 1) +
autolayer(ma1_mod(0.9), size = 1, series = "0.9") +
ylab("MA(1) models") +
guides(colour = guide_legend(title = "Theta1"))
# The variation from y increases, as theta increases
# e.
y_arima.1.1 <- ts(numeric(50))
e <- rnorm(50)
for(i in 2:50){
y_arima.1.1[i] <- 0.6*y_arima.1.1[i-1] + 0.6*e[i-1] + e[i]
}
# f.
y_arima.2 <- ts(numeric(50))
e <- rnorm(50)
for(i in 3:50){
y_arima.2[i] <- -0.8*y_arima.2[i-1] + 0.3*y_arima.2[i-2] + e[i]
}
# g.
autoplot(y_arima.1.1, series = "ARMA(1,1)") +
autolayer(y_arima.2, series = "AR(2)") +
ylab("y") +
guides(colour = guide_legend(title = "Models"))
Consider wmurders, the number of women murdered each year (per 100,000 standard population) in the United States. a. By studying appropriate graphs of the series in R, find an appropriate ARIMA(\(p,d,q\)) model for these data. b. Should you include a constant in the model? Explain. c. Write this model in terms of the backshift operator. d. Fit the model using R and examine the residuals. Is the model satisfactory? e. Forecast three times ahead. Check your forecasts by hand to make sure that you know how they have been calculated. f. Create a plot of the series with forecasts and prediction intervals for the next three periods shown. g. Does auto.arima() give the same model you have chosen? If not, which model do you think is better?
## [1] 2
## Warning in kpss.test(diff(wmurders, differences = 2)): p-value greater than
## printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(wmurders, differences = 2)
## KPSS Level = 0.045793, Truncation lag parameter = 3, p-value = 0.1
# kpss test result shows twice differencing made the data stationary.
diff(wmurders, differences = 2) %>% ggtsdisplay()
library(fpp2)
# d.
wmurders_arima.0.2.2 <- Arima(wmurders, order = c(0, 2, 2))
checkresiduals(wmurders_arima.0.2.2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,2)
## Q* = 11.764, df = 8, p-value = 0.1621
##
## Model df: 2. Total lags used: 10
# Satisfacotry model, the residuals are approximate to white noise series
# e.
fc_wmurders_arima.0.2.2 <- forecast(wmurders_arima.0.2.2, h = 3)
fc_wmurders_arima.0.2.2$mean
## Time Series:
## Start = 2005
## End = 2007
## Frequency = 1
## [1] 2.480525 2.374890 2.269256
## Series: wmurders
## ARIMA(0,2,2)
##
## Coefficients:
## ma1 ma2
## -1.0181 0.1470
## s.e. 0.1220 0.1156
##
## sigma^2 estimated as 0.04702: log likelihood=6.03
## AIC=-6.06 AICc=-5.57 BIC=-0.15
# Manual calculation
# Formula
# (1 - B)^2*yt = (1 - 1.0181*B + 0.1470*B^2)*et
# yt = 2yt-1 - yt-2 + et - 1.0181*et-1 + 0.1470*et-2
years <- length(wmurders)
e <- fc_wmurders_arima.0.2.2$residuals
fc1 <- 2*wmurders[years] - wmurders[years - 1] - 1.0181*e[years] + 0.1470*e[years - 1]
fc2 <- 2*fc1 - wmurders[years] + 0.1470*e[years]
fc3 <- 2*fc2 - fc1
c(fc1, fc2, fc3)
## [1] 2.480523 2.374887 2.269252
# Manual forecasts are very similar to the ones from the ARIMA model
# f.
autoplot(fc_wmurders_arima.0.2.2)
# g.
fc_wmurders_autoarima <- forecast(auto.arima(wmurders), h = 3)
accuracy(fc_wmurders_arima.0.2.2)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0113461 0.2088162 0.1525773 -0.2403396 4.331729 0.9382785
## ACF1
## Training set -0.05094066
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01065956 0.2072523 0.1528734 -0.2149476 4.335214 0.9400996
## ACF1
## Training set 0.02176343
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,2)
## Q* = 11.764, df = 8, p-value = 0.1621
##
## Model df: 2. Total lags used: 10
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,2,1)
## Q* = 12.419, df = 8, p-value = 0.1335
##
## Model df: 2. Total lags used: 10