Data 624-HW6

\[9.1\]

a.The figures seem to show white noise data because the ACF plots do not show a discernable pattern of correlation between lags. The expectation is that at least 95% of autocorrelations are within the critical boundary to be considered white noise. Series X2 does have 2 nudge slightly above the critical values, which means that slightly less than 95% of the values meet the criteria, however it isn’t enough to say that the data do not resemble white noise.

b.The critical values are based on the length of the time series, they are defined as ±2T√1. So, the longer the time series, the critical values shrink.

library(fpp3)

## Warning: package 'fpp3' was built under R version 4.3.3

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2

## Warning: package 'dplyr' was built under R version 4.3.2

## Warning: package 'tidyr' was built under R version 4.3.2

## Warning: package 'lubridate' was built under R version 4.3.2

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tsibble' was built under R version 4.3.3

## Warning: package 'tsibbledata' was built under R version 4.3.3

## Warning: package 'feasts' was built under R version 4.3.3

## Warning: package 'fabletools' was built under R version 4.3.3

## Warning: package 'fable' was built under R version 4.3.3

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.3.3

## Warning: package 'readr' was built under R version 4.3.2

## Warning: package 'purrr' was built under R version 4.3.2

## Warning: package 'stringr' was built under R version 4.3.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.2     ✔ stringr 1.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(gridExtra)

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

\[9.2\]

gafa_stock %>%
distinct(Symbol)

amazon <- gafa_stock %>%
  filter(Symbol == "AMZN")

p1_a <- amazon %>% 
  autoplot(Close) +
  labs(title="Daily Closing Stock Price (Amazon) ")

p2_a <- amazon %>%
  ACF(Close) %>%
  autoplot() + labs(title="Correlation of Daily Closing Stock Price (Amazon) ")

## Warning: Provided data has an irregular interval, results should be treated
## with caution. Computing ACF by observation.

p3_a <- amazon %>%
  PACF(Close) %>%
  autoplot() + labs(title="Partial Autocorrelation Daily Closing Stock Price (Amazon) ")

## Warning: Provided data has an irregular interval, results should be treated
## with caution. Computing ACF by observation.

grid.arrange(p1_a, p2_a, p3_a, nrow = 2)

## From the plots: we clearly see a trend in the data and this is also evident in the ACF plot, we see a slow decrease as the lags increase suggesting a trend, the series is non-stationary. Additionally, the large initial spike in the PACF plot also indicates that this data is not stationary, therefore it should be differenced to make it stationary.

\[9.3\]

#a.
turk <- global_economy %>%
  filter(Country == "Turkey")

p1_t <- turk %>% 
  autoplot(GDP) +
  labs(title="Turkey GDP")

p2_t <- turk %>%
  ACF(GDP) %>%
  autoplot() + labs(title="Correlation of Turkey GDP ")

p3_t <- turk %>%
  PACF(GDP) %>%
  autoplot() + labs(title="Partial Autocorrelation of Turkey GDP ")

grid.arrange(p1_t, p2_t, p3_t, nrow = 2)

## The Turkish GDP data is non-stationary. We’ll make the appropriate Box-Cox transformations and order of differencing in order to obtain stationary data.

lambda_t <- turk %>%
  features(GDP,features=guerrero) %>%
  pull(lambda_guerrero)
lambda_t

## [1] 0.1572187

# The guerrero feature suggests a transformation with lamda 0.16.

turk %>%
  features(box_cox(GDP, lambda_t), unitroot_ndiffs)

p4_t <- turk %>% 
  autoplot(difference(box_cox(GDP, lambda_t), 1)) +
  labs(title="Turkey GDP After Transformation")

p5_t <- turk %>%
  ACF(difference(box_cox(GDP, lambda_t), 1)) %>%
  autoplot() + labs(title="Correlation of Turkey GDP After Transformation ")

p6_t <- turk %>%
  PACF(difference(box_cox(GDP, lambda_t), 1)) %>%
  autoplot() + labs(title="Partial Autocorrelation of Turkey GDP After Transformation ")

grid.arrange(p4_t, p5_t, p6_t, nrow = 2)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

We were able to make the Turkish GDP stationary after finding an appropriate Box-Cox transformation and 1st order of differencing.

#b.
tas <- aus_accommodation %>%
  filter(State == "Tasmania")

p1_ta <- tas %>% 
  autoplot(Takings) +
  labs(title="Accomodation Takings in Tasmania")

p2_ta <- tas %>%
  ACF(Takings) %>%
  autoplot() + labs(title="Correlation of Takings in Tasmania ")

p3_ta <- tas %>%
  PACF(Takings) %>%
  autoplot() + labs(title="Partial Autocorrelation of Takings in Tasmania ")

grid.arrange(p1_ta, p2_ta, p3_ta, nrow = 2)

## We may need to apply unitroot_nsdiffs() to the quarterly Tasmanian takings data in order to determine if we need any seasonal differencing.

lambda_ta <- tas %>%
  features(Takings,features=guerrero) %>%
  pull(lambda_guerrero)
lambda_ta

## [1] 0.001819643

tas %>%
  features(box_cox(Takings, lambda_ta), unitroot_nsdiffs)

tas %>%
  features(difference(box_cox(Takings, lambda_ta), 4), unitroot_ndiffs)

p4_ta <- tas %>%
  autoplot(difference(box_cox(Takings, lambda_ta), 4)) +
  labs(title="Accomodation Takings in Tasmania After Transformation")

p5_ta <- tas %>%
  ACF(difference(box_cox(Takings, lambda_ta), 4)) %>%
  autoplot() + labs(title="Correlation of Takings After Tranformation ")

p6_ta <- tas %>%
  PACF(difference(box_cox(Takings, lambda_ta), 4)) %>%
  autoplot() + labs(title="Partial Autocorrelation of Takings After Transformation ")

grid.arrange(p4_ta, p5_ta, p6_ta, nrow = 2)

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

After applying the necessary transformations we can see on the ACF plot that about 3 autocorrelations are outside the 95% limit and 2 in the PACF plots. This is indicative that the transformations have made the data stationary.

#c.
p1_s <- souvenirs %>%
  autoplot(Sales) +
  labs(title="Monthly Sales")

p2_s <- souvenirs %>%
  ACF(Sales) %>%
  autoplot() + labs(title="Correlation of Monthly Sales ")

p3_s <- souvenirs %>%
  PACF(Sales) %>%
  autoplot() + labs(title="Partial Autocorrelation of Monthly Sales ")

grid.arrange(p1_s, p2_s, p3_s, nrow = 2)

## We may need to apply unitroot_nsdiffs() to the monthly sales data in order to determine if we need any seasonal differencing.

lambda_s <- souvenirs %>%
  features(Sales,features=guerrero) %>%
  pull(lambda_guerrero)
lambda_s

## [1] 0.002118221

souvenirs %>%
  features(box_cox(Sales, lambda_s), unitroot_nsdiffs)

souvenirs %>%
  features(difference(box_cox(Sales, lambda_s), 12), unitroot_ndiffs)

p4_s <- souvenirs %>%
  autoplot(difference(box_cox(Sales, lambda_s), 12)) +
  labs(title="Monthly Sales After Transformation")

p5_s <- souvenirs %>%
  ACF(difference(box_cox(Sales, lambda_s), 12)) %>%
  autoplot() + labs(title="Correlation of Monthly Sales After Tranformation ")

p6_s <- souvenirs %>%
  PACF(difference(box_cox(Sales, lambda_s), 12)) %>%
  autoplot() + labs(title="Partial Autocorrelation of Monthly Sales After Transformation ")

grid.arrange(p4_s, p5_s, p6_s, nrow = 2)

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

After applying the necessary transformations we can see on the ACF and PACF plots that only two autocorrelations are outside the 95% limits, which suggests we’ve made the data stationary.

\[9.5\]

set.seed(420)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
p1_m <- myseries %>%
  autoplot(Turnover) +
  labs(title="Monthly Turnover")

p2_m <- myseries %>%
  ACF(Turnover) %>%
  autoplot() + labs(title="Correlation of Monthly Turnover ")

p3_m <- myseries %>%
  PACF(Turnover) %>%
  autoplot() + labs(title="Partial Autocorrelation of Monthly Turnover ")

grid.arrange(p1_m, p2_m, p3_m, nrow = 2)

lambda_m <- myseries %>%
  features(Turnover,features=guerrero) %>%
  pull(lambda_guerrero)
lambda_m

## [1] 0.1409206

myseries %>%
  features(box_cox(Turnover, lambda_m), unitroot_nsdiffs)

myseries %>%
  features(difference(box_cox(Turnover, lambda_m), 12), unitroot_ndiffs)

p4_m <- myseries %>%
  autoplot(difference(box_cox(Turnover, lambda_m), 12)) +
  labs(title="Monthly Turnover After Transformation")

p5_m <- myseries %>%
  ACF(difference(box_cox(Turnover, lambda_m), 12)) %>%
  autoplot() + labs(title="Correlation of Monthly Turnover After Tranformation ")

p6_m <- myseries %>%
  PACF(difference(box_cox(Turnover, lambda_m), 12)) %>%
  autoplot() + labs(title="Partial Autocorrelation of Monthly Turnover After Transformation ")

grid.arrange(p4_m, p5_m, p6_m, nrow = 2)

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## It does not seem that we succeeded on making the data stationary, even after making the necessary changes.

\[9.6\]

#a.

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)

#b.

sim %>% autoplot(y)

set.seed(123)
y2 <- numeric(100)
y3 <- numeric(100)
e2 <- rnorm(100)
e3 <- rnorm(100)

for(i in 2:100){
    y2[i] <- 0.1*y2[i-1] + e2[i]
    y3[i] <- 0.9*y3[i-1] + e3[i]
    
}

sim <- tsibble(idx = seq_len(100), y = y, y2 = y2, y3 = y3, index = idx)

plot1 <- sim %>% autoplot(y2) + labs( title = "Phi = 0.1")
plot2 <- sim %>% autoplot(y) + labs( title = "Phi = 0.6")
plot3 <- sim %>% autoplot(y3) + labs( title = "Phi = 0.9")

grid.arrange(plot1, plot2, plot3, nrow = 2)

## as the ϕ1 decreases or approaches 0, yt starts to be equivalent to white noise

#c.
set.seed(123)
y4 <- numeric(100)
e4 <- rnorm(100)

for(i in 2:100)
  y4[i] <- 0.6*e4[i-1] + e4[i]
  
sim2 <- tsibble(idx = seq_len(100), y4 = y4, index = idx)

#d.
set.seed(123)
set.seed(123)
y5 <- numeric(100)
y6 <- numeric(100)
e5 <- rnorm(100)
e6 <- rnorm(100)

for(i in 2:100){
  y5[i] <- 0.1*e5[i-1] + e5[i]
  y6[i] <- 0.9*e6[i-1] + e6[i]
}
sim2 <- tsibble(idx = seq_len(100), y4 = y4, y5 = y5, y6 = y6, index = idx)

plot4 <- sim2 %>% autoplot(y5) + labs( title = "Theta = 0.1")
plot5 <- sim2 %>% autoplot(y4) + labs( title = "Theta = 0.6")
plot6 <- sim2 %>% autoplot(y6) + labs( title = "Theta = 0.9")

grid.arrange(plot4, plot5, plot6, nrow = 2)

## Most recent observations have higher weight than observations from the more distant past. Additionally, only the scale in the series changes, not the patterns.

#e.
set.seed(123)
y7 <- (numeric(100))
e7 <- rnorm(100)

for(i in 2:100)
  y7[i] <- 0.6*y7[i-1] + 0.6*e7[i-1] + e7[i] 

sim3 <- tsibble(idx = seq_len(100), y7 = y7, index = idx)

#f.
set.seed(123)
y8 <- (numeric(100))
e8 <- rnorm(100)

for(i in 3:100)
  y8[i] <- -0.8*y8[i-1] + 0.3*y8[i-2] + e8[i]

sim4 <- tsibble(idx = seq_len(100), y8 = y8, index = idx)

#g.
plot7 <- sim3 %>% autoplot(y7) + labs( title = "ARMA(1,1) Phi = 0.6 and Theta = 0.6")
plot8 <- sim4 %>% autoplot(y8) + labs( title = "AR(2) Phi1 = -0.8 and Phi2 = 0.3")

grid.arrange(plot7, plot8, nrow = 2)

## The ARMA(1,1) model seems to be approaching stationarity. Perhaps by decreasing Phi we could achieve this. The AR(2) model has negative coefficient of -.8 which will cause the first term to alternate between a positive and negative value. We can also observe that the AR(2) model shows larger values as time progresses due to the Phi2 term. \[9.7\]

autoplot(aus_airpassengers)

## Plot variable not specified, automatically selected `.vars = Passengers`

#a.
fit <- aus_airpassengers %>%
  model(ARIMA(Passengers))

report(fit)

## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65

fit %>% gg_tsresiduals()

fit %>% forecast(h=10) %>%
  autoplot(aus_airpassengers)

## b. ARIMA(0,2,1): ((1−B)^2)Yt=(1+(Θ1)B)ϵt

#c.
fit2 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ 1 + pdq(0,1,0)))

report(fit2)

## Series: Passengers 
## Model: ARIMA(0,1,0) w/ drift 
## 
## Coefficients:
##       constant
##         1.4191
## s.e.    0.3014
## 
## sigma^2 estimated as 4.271:  log likelihood=-98.16
## AIC=200.31   AICc=200.59   BIC=203.97

fit2 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers)

## The forecasts look very similar, upward trending

#d.
fit3 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ 1 + pdq(2,1,2)))

report(fit3)

## Series: Passengers 
## Model: ARIMA(2,1,2) w/ drift 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2  constant
##       -0.5518  -0.7327  0.5895  1.0000    3.2216
## s.e.   0.1684   0.1224  0.0916  0.1008    0.7224
## 
## sigma^2 estimated as 4.031:  log likelihood=-96.23
## AIC=204.46   AICc=206.61   BIC=215.43

fit3 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers)

fit4 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ 0 + pdq(2,1,2)))

## Warning: 1 error encountered for ARIMA(Passengers ~ 0 + pdq(2, 1, 2))
## [1] non-stationary AR part from CSS

report(fit4)

## Series: Passengers 
## Model: NULL model 
## NULL model

When we remove the constant, we get an error (non-stationary AR part from CSS) and the model is NULL.

#e.

fit5 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ 1 + pdq(0,2,1)))

## Warning: Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.

report(fit5)

## Series: Passengers 
## Model: ARIMA(0,2,1) w/ poly 
## 
## Coefficients:
##           ma1  constant
##       -1.0000    0.0571
## s.e.   0.0585    0.0213
## 
## sigma^2 estimated as 3.855:  log likelihood=-95.1
## AIC=196.21   AICc=196.79   BIC=201.63

fit5 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers)

## We can see that the line has become steeper and forecasts are higher than seen in previous graphs.

\[9.8\]

us <- global_economy %>%
  filter(Country == "United States")

autoplot(us)

## Plot variable not specified, automatically selected `.vars = GDP`

#a.
lambda_us <- us %>%
  features(GDP,features=guerrero) %>%
  pull(lambda_guerrero)
lambda_us

## [1] 0.2819443

us %>% autoplot(box_cox(GDP, lambda_us))

## transformation has helped straighten the line.

#b.
fit_us <- us %>%
  model(ARIMA(box_cox(GDP, lambda_us)))

report(fit_us)

## Series: GDP 
## Model: ARIMA(1,1,0) w/ drift 
## Transformation: box_cox(GDP, lambda_us) 
## 
## Coefficients:
##          ar1  constant
##       0.4586  118.1822
## s.e.  0.1198    9.5047
## 
## sigma^2 estimated as 5479:  log likelihood=-325.32
## AIC=656.65   AICc=657.1   BIC=662.78

#c.
fit_others <- us %>%
  model(
    "ARIMA(2,1,2)" = ARIMA(box_cox(GDP, lambda_us) ~ 1 + pdq(2,1,2)),
    "ARIMA(0,1,0)" = ARIMA(box_cox(GDP, lambda_us) ~ 1 + pdq(0,1,0)),
    "ARIMA(1,1,0)" = ARIMA(box_cox(GDP, lambda_us) ~ 1 + pdq(1,1,0)) 
    
      )
glance(fit_others) %>%
  arrange (AIC) %>%
  select(.model:BIC)

#d.
fit_us %>% gg_tsresiduals()

## The residuals are distributed evenly on the time plot. We can also observe that the residuals look like white noise on the ACF plot.

#e.
fit_us %>% 
  forecast(h=5) %>%
  autoplot(us)

The forecasts look reasonable as they are following the trend contained within the data

#f.

fit_ets <- us %>%
  model(ETS(GDP))

fit_ets %>%
  forecast(h=5) %>%
  autoplot(us)

## We can observe that the prediction intervals for the ETS() model are a lot wider than for the ARIMA() model. And if we look at the model report, we can also realize that the AIC is a lot larger for the ETS() model.

report(fit_ets)

## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9990876 
##     beta  = 0.5011949 
## 
##   Initial states:
##          l[0]        b[0]
##  448093333334 64917355687
## 
##   sigma^2:  7e-04
## 
##      AIC     AICc      BIC 
## 3190.787 3191.941 3201.089