DATA 624 HW6

suppressPackageStartupMessages(library(fpp2))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(fpp3))
suppressPackageStartupMessages(library(urca))
suppressPackageStartupMessages(library(forecast))

Home Work #6

Question 9.1

Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.

Explain the differences among these figures. Do they all indicate that the data are white noise?

Looks like these figures are all of white noise - 95% of the spikes within bounds. As the sample size increases we see the (autocorrelation) dashed lines around the mean of zero narrow.

Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise?

The critical values are at different distances from the mean of zero because of the autocorrelation formula. The critical values are based on the sample size, each diagram had a different sample size.

Question 9.2

A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.

ggtsdisplay(ibmclose)

The Time series shows periods of rise and fall. ACF shows the slow decrease due to the trend. I dont notice “scalloped” shape - no seasonality. PACF shows the 1st lag is nearly one, and all other PACF are close to zero, so other lags are autocorrelation. Thus, altogether confirming that it is non-stationary and should be differenced to make into a stationary time series.

Question 9.3

For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.

Turkish GDP from global_economy.

TurkishGDP <- global_economy %>% filter(Country == "Turkey")
ggtsdisplay(TurkishGDP$GDP)

Global_BC <- BoxCox(TurkishGDP$GDP, lambda = BoxCox.lambda(TurkishGDP$GDP))
ndiffs(Global_BC)

## [1] 1

Global1 <- diff(Global_BC)
ggtsdisplay(Global1)

Accommodation takings in the state of Tasmania from aus_accommodation.

Tasmania1 <- aus_accommodation %>% filter(State == "Tasmania")
ggtsdisplay(Tasmania1$Takings)

Tasmania_BC <- BoxCox(Tasmania1$Takings, lambda = BoxCox.lambda(Tasmania1$Takings))
ndiffs(Tasmania_BC)

## [1] 1

Tasmaniadiff <- diff(Tasmania_BC)
ggtsdisplay(Tasmaniadiff)

Monthly sales from souvenirs.

Souvenirs1 <- souvenirs
ggtsdisplay(Souvenirs1$Sales)

Souvenirs_BC <- BoxCox(Souvenirs1$Sales, lambda = BoxCox.lambda(Souvenirs1$Sales))
ndiffs(Souvenirs_BC)

## [1] 1

Souvenirsdiff <- diff(Souvenirs_BC)
ggtsdisplay(Souvenirsdiff)

Question 9.5

For your retail data (from Exercise 8 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.

set.seed(718)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>%
    gg_tsdisplay(Turnover, plot_type = 'partial')

There is an obvious trend and there seems to be some seasonality as well - This cannot be the case for a stationary series.

ndiffs(myseries$Turnover)

## [1] 1

myseries$Turnover %>% 
    diff(differences = 2) %>% 
    ur.kpss() %>% 
    summary()

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 5 lags. 
## 
## Value of test-statistic is: 0.0324 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

differenced to reach stationarity

Question 9.6

Simulate and plot some data from simple ARIMA models.

#A
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)

#B
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
p1 <- autoplot(y) +ggtitle("phi=0.6")

for(i in 2:100)
  y[i] <- 0.1*y[i-1] + e[i]
p2<- autoplot(y)+ggtitle(" phi=0.1")

for(i in 2:100)
  y[i] <- 1.0*y[i-1] + e[i]
p3<- autoplot(y)+ggtitle("phi= 1.0")

p1

p2

p3

#CD
ma.1 <- function(theta, sd=1, n=100){
  y <- ts(numeric(n))
  e <- rnorm(n, sd=sd)
  e[1] <- 0
  for(i in 2:n)
    y[i] <- theta*e[i-1] + e[i]
  return(y)
}

data.list <- list()
i <- 0
theta.vec <- c(0.0000001, 0.0001, 0.1) * 10
for (theta in theta.vec){
  i <- i + 1
  data.list[[i]] <- ma.1(theta)
}
gen.data <- do.call(cbind, data.list)
colnames(gen.data) <- paste('theta=', theta.vec, sep = '')
autoplot(gen.data) + ylab('Data')

y1 <- ts(numeric(100))
e <- rnorm(100, sd=1)
for(i in 2:100)
  y1[i] <- 0.6*y1[i-1] + 0.6*e[i-1] + e[i]
autoplot(y1) +
  ggtitle('ARMA(1,1) Generated Data')

y2 <- ts(numeric(100))
e <- rnorm(100, sd=1)
for(i in 3:100)
  y2[i] <- -0.8*y2[i-1] + 0.3*y2[i-2] + e[i]
autoplot(y2) +
  ggtitle('AR(2) Generated Data')

par(mfrow=c(1,2))
acf(y1, main='ARMA(1,1) Generated Data')
acf(y2, main='AR(2) Generated Data')

Question 9.7

Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.

Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods. Write the model in terms of the backshift operator. Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a. Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens. Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?

fit <- aus_airpassengers %>%
  model(ARIMA(Passengers))

report(fit)

## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65

fit %>% forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(y = "Passengers (Millions)", title = "Australian Air Travel")

fit2 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ pdq(0,1,0)))

fit2 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(y = "Millions of Passengers", title = "Australian Air Travel")

fit3 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ pdq(2,1,2)))

## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.

## Warning: 1 error encountered for ARIMA(Passengers ~ pdq(2, 1, 2))
## [1] Could not find an appropriate ARIMA model.
## This is likely because automatic selection does not select models with characteristic roots that may be numerically unstable.
## For more details, refer to https://otexts.com/fpp3/arima-r.html#plotting-the-characteristic-roots

report(fit3)

## Series: Passengers 
## Model: NULL model 
## NULL model

fit3 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(y = "Millions of Passengers", title = "Australian Air Travel")

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 10 row(s) containing missing values (geom_path).

Question 9.8

For the United States GDP series (from global_economy):

if necessary, find a suitable Box-Cox transformation for the data; fit a suitable ARIMA model to the transformed data using ARIMA(); try some other plausible models by experimenting with the orders chosen; choose what you think is the best model and check the residual diagnostics; produce forecasts of your fitted model. Do the forecasts look reasonable? compare the results with what you would obtain using ETS() (with no transformation).

data(global_economy)
plot(global_economy)

lam <- BoxCox.lambda(global_economy$GDP)
global.bcx <- BoxCox(global_economy$GDP, lambda = lam)
tsdisplay(global.bcx)

fit <- auto.arima(global_economy$GDP, trace = TRUE, ic ="aic", lambda = lam)

## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,0,2) with non-zero mean : Inf
##  ARIMA(0,0,0) with non-zero mean : 87508.09
##  ARIMA(1,0,0) with non-zero mean : 45033.15
##  ARIMA(0,0,1) with non-zero mean : 36186.37
##  ARIMA(0,0,0) with zero mean     : 124043.8
##  ARIMA(1,0,1) with non-zero mean : 7587.691
##  ARIMA(2,0,1) with non-zero mean : Inf
##  ARIMA(1,0,2) with non-zero mean : Inf
##  ARIMA(0,0,2) with non-zero mean : 29097
##  ARIMA(2,0,0) with non-zero mean : 45122.56
##  ARIMA(1,0,1) with zero mean     : Inf
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,1) with non-zero mean : 46053.52
## 
##  Best model: ARIMA(1,0,1) with non-zero mean

fit

## Series: global_economy$GDP 
## ARIMA(1,0,1) with non-zero mean 
## Box Cox transformation: lambda= 0.04769931 
## 
## Coefficients:
##          ar1     ma1     mean
##       0.9828  0.0621  44.7575
## s.e.  0.0015  0.0089   0.8371
## 
## sigma^2 estimated as 2.781:  log likelihood=-23022.76
## AIC=46053.52   AICc=46053.53   BIC=46084.03

fit2 <- Arima(global_economy$GDP, order = c(0,1,2), lambda = lam)
fit3 <- Arima(global_economy$GDP, order = c(1,1,2), lambda = lam)

accuracy(fit)

##                        ME          RMSE          MAE       MPE     MAPE
## Training set 103909788567 1051556889908 165545654172 -3023.578 3039.788
##                  MASE      ACF1
## Training set 1.542086 0.2316001

accuracy(fit2)

##                       ME         RMSE          MAE       MPE     MAPE      MASE
## Training set 19358979257 958061637739 103255710294 -3301.808 3318.423 0.9618445
##                      ACF1
## Training set -0.005642225

accuracy(fit3)

##                       ME         RMSE          MAE       MPE     MAPE      MASE
## Training set 14526564969 953953513593 100406947939 -3297.973 3314.469 0.9353078
##                     ACF1
## Training set -0.01186029

plot(fit3$residuals)

fcst <- forecast(fit, h = 10)
plot(fcst)

fit4 <- ets(global_economy$GDP); fit4

## Warning in ets(global_economy$GDP): Missing values encountered. Using longest
## contiguous portion of time series

## ETS(M,N,N) 
## 
## Call:
##  ets(y = global_economy$GDP) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 44237333811.1732 
## 
##   sigma:  0.6476
## 
##      AIC     AICc      BIC 
## 24919.44 24919.50 24931.65

fcst2 <- forecast(fit4, h = 10)
plot(fcst2)