\[9.1\]
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.4
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.3
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
\[9.2\]
gafa_stock %>%
distinct(Symbol)
amazon <- gafa_stock %>%
filter(Symbol == "AMZN")
p1_a <- amazon %>%
autoplot(Close) +
labs(title="Daily Closing Stock Price (Amazon) ")
p2_a <- amazon %>%
ACF(Close) %>%
autoplot() + labs(title="Correlation of Daily Closing Stock Price (Amazon) ")
## Warning: Provided data has an irregular interval, results should be treated
## with caution. Computing ACF by observation.
p3_a <- amazon %>%
PACF(Close) %>%
autoplot() + labs(title="Partial Autocorrelation Daily Closing Stock Price (Amazon) ")
## Warning: Provided data has an irregular interval, results should be treated
## with caution. Computing ACF by observation.
grid.arrange(p1_a, p2_a, p3_a, nrow = 2)
## From the plots: we clearly see a trend in the data and this is also
evident in the ACF plot, we see a slow decrease as the lags increase
suggesting a trend, the series is non-stationary. Additionally, the
large initial spike in the PACF plot also indicates that this data is
not stationary, therefore it should be differenced to make it
stationary.
\[9.3\]
#a.
turk <- global_economy %>%
filter(Country == "Turkey")
p1_t <- turk %>%
autoplot(GDP) +
labs(title="Turkey GDP")
p2_t <- turk %>%
ACF(GDP) %>%
autoplot() + labs(title="Correlation of Turkey GDP ")
p3_t <- turk %>%
PACF(GDP) %>%
autoplot() + labs(title="Partial Autocorrelation of Turkey GDP ")
grid.arrange(p1_t, p2_t, p3_t, nrow = 2)
## The Turkish GDP data is non-stationary. We’ll make the appropriate
Box-Cox transformations and order of differencing in order to obtain
stationary data.
lambda_t <- turk %>%
features(GDP,features=guerrero) %>%
pull(lambda_guerrero)
lambda_t
## [1] 0.1572187
# The guerrero feature suggests a transformation with lamda 0.16.
turk %>%
features(box_cox(GDP, lambda_t), unitroot_ndiffs)
p4_t <- turk %>%
autoplot(difference(box_cox(GDP, lambda_t), 1)) +
labs(title="Turkey GDP After Transformation")
p5_t <- turk %>%
ACF(difference(box_cox(GDP, lambda_t), 1)) %>%
autoplot() + labs(title="Correlation of Turkey GDP After Transformation ")
p6_t <- turk %>%
PACF(difference(box_cox(GDP, lambda_t), 1)) %>%
autoplot() + labs(title="Partial Autocorrelation of Turkey GDP After Transformation ")
grid.arrange(p4_t, p5_t, p6_t, nrow = 2)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
#b.
tas <- aus_accommodation %>%
filter(State == "Tasmania")
p1_ta <- tas %>%
autoplot(Takings) +
labs(title="Accomodation Takings in Tasmania")
p2_ta <- tas %>%
ACF(Takings) %>%
autoplot() + labs(title="Correlation of Takings in Tasmania ")
p3_ta <- tas %>%
PACF(Takings) %>%
autoplot() + labs(title="Partial Autocorrelation of Takings in Tasmania ")
grid.arrange(p1_ta, p2_ta, p3_ta, nrow = 2)
## We may need to apply unitroot_nsdiffs() to the quarterly Tasmanian
takings data in order to determine if we need any seasonal
differencing.
lambda_ta <- tas %>%
features(Takings,features=guerrero) %>%
pull(lambda_guerrero)
lambda_ta
## [1] 0.001819643
tas %>%
features(box_cox(Takings, lambda_ta), unitroot_nsdiffs)
tas %>%
features(difference(box_cox(Takings, lambda_ta), 4), unitroot_ndiffs)
p4_ta <- tas %>%
autoplot(difference(box_cox(Takings, lambda_ta), 4)) +
labs(title="Accomodation Takings in Tasmania After Transformation")
p5_ta <- tas %>%
ACF(difference(box_cox(Takings, lambda_ta), 4)) %>%
autoplot() + labs(title="Correlation of Takings After Tranformation ")
p6_ta <- tas %>%
PACF(difference(box_cox(Takings, lambda_ta), 4)) %>%
autoplot() + labs(title="Partial Autocorrelation of Takings After Transformation ")
grid.arrange(p4_ta, p5_ta, p6_ta, nrow = 2)
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
#c.
p1_s <- souvenirs %>%
autoplot(Sales) +
labs(title="Monthly Sales")
p2_s <- souvenirs %>%
ACF(Sales) %>%
autoplot() + labs(title="Correlation of Monthly Sales ")
p3_s <- souvenirs %>%
PACF(Sales) %>%
autoplot() + labs(title="Partial Autocorrelation of Monthly Sales ")
grid.arrange(p1_s, p2_s, p3_s, nrow = 2)
## We may need to apply unitroot_nsdiffs() to the monthly sales data in
order to determine if we need any seasonal differencing.
lambda_s <- souvenirs %>%
features(Sales,features=guerrero) %>%
pull(lambda_guerrero)
lambda_s
## [1] 0.002118221
souvenirs %>%
features(box_cox(Sales, lambda_s), unitroot_nsdiffs)
souvenirs %>%
features(difference(box_cox(Sales, lambda_s), 12), unitroot_ndiffs)
p4_s <- souvenirs %>%
autoplot(difference(box_cox(Sales, lambda_s), 12)) +
labs(title="Monthly Sales After Transformation")
p5_s <- souvenirs %>%
ACF(difference(box_cox(Sales, lambda_s), 12)) %>%
autoplot() + labs(title="Correlation of Monthly Sales After Tranformation ")
p6_s <- souvenirs %>%
PACF(difference(box_cox(Sales, lambda_s), 12)) %>%
autoplot() + labs(title="Partial Autocorrelation of Monthly Sales After Transformation ")
grid.arrange(p4_s, p5_s, p6_s, nrow = 2)
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
\[9.5\]
set.seed(420)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
p1_m <- myseries %>%
autoplot(Turnover) +
labs(title="Monthly Turnover")
p2_m <- myseries %>%
ACF(Turnover) %>%
autoplot() + labs(title="Correlation of Monthly Turnover ")
p3_m <- myseries %>%
PACF(Turnover) %>%
autoplot() + labs(title="Partial Autocorrelation of Monthly Turnover ")
grid.arrange(p1_m, p2_m, p3_m, nrow = 2)
lambda_m <- myseries %>%
features(Turnover,features=guerrero) %>%
pull(lambda_guerrero)
lambda_m
## [1] 0.1409206
myseries %>%
features(box_cox(Turnover, lambda_m), unitroot_nsdiffs)
myseries %>%
features(difference(box_cox(Turnover, lambda_m), 12), unitroot_ndiffs)
p4_m <- myseries %>%
autoplot(difference(box_cox(Turnover, lambda_m), 12)) +
labs(title="Monthly Turnover After Transformation")
p5_m <- myseries %>%
ACF(difference(box_cox(Turnover, lambda_m), 12)) %>%
autoplot() + labs(title="Correlation of Monthly Turnover After Tranformation ")
p6_m <- myseries %>%
PACF(difference(box_cox(Turnover, lambda_m), 12)) %>%
autoplot() + labs(title="Partial Autocorrelation of Monthly Turnover After Transformation ")
grid.arrange(p4_m, p5_m, p6_m, nrow = 2)
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## It does not seem that we succeeded on making the data stationary,
even after making the necessary changes.
\[9.6\]
#a.
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
#b.
sim %>% autoplot(y)
set.seed(123)
y2 <- numeric(100)
y3 <- numeric(100)
e2 <- rnorm(100)
e3 <- rnorm(100)
for(i in 2:100){
y2[i] <- 0.1*y2[i-1] + e2[i]
y3[i] <- 0.9*y3[i-1] + e3[i]
}
sim <- tsibble(idx = seq_len(100), y = y, y2 = y2, y3 = y3, index = idx)
plot1 <- sim %>% autoplot(y2) + labs( title = "Phi = 0.1")
plot2 <- sim %>% autoplot(y) + labs( title = "Phi = 0.6")
plot3 <- sim %>% autoplot(y3) + labs( title = "Phi = 0.9")
grid.arrange(plot1, plot2, plot3, nrow = 2)
## as the ϕ1 decreases or approaches 0, yt starts to be equivalent to
white noise
#c.
set.seed(123)
y4 <- numeric(100)
e4 <- rnorm(100)
for(i in 2:100)
y4[i] <- 0.6*e4[i-1] + e4[i]
sim2 <- tsibble(idx = seq_len(100), y4 = y4, index = idx)
#d.
set.seed(123)
set.seed(123)
y5 <- numeric(100)
y6 <- numeric(100)
e5 <- rnorm(100)
e6 <- rnorm(100)
for(i in 2:100){
y5[i] <- 0.1*e5[i-1] + e5[i]
y6[i] <- 0.9*e6[i-1] + e6[i]
}
sim2 <- tsibble(idx = seq_len(100), y4 = y4, y5 = y5, y6 = y6, index = idx)
plot4 <- sim2 %>% autoplot(y5) + labs( title = "Theta = 0.1")
plot5 <- sim2 %>% autoplot(y4) + labs( title = "Theta = 0.6")
plot6 <- sim2 %>% autoplot(y6) + labs( title = "Theta = 0.9")
grid.arrange(plot4, plot5, plot6, nrow = 2)
## Most recent observations have higher weight than observations from
the more distant past. Additionally, only the scale in the series
changes, not the patterns.
#e.
set.seed(123)
y7 <- (numeric(100))
e7 <- rnorm(100)
for(i in 2:100)
y7[i] <- 0.6*y7[i-1] + 0.6*e7[i-1] + e7[i]
sim3 <- tsibble(idx = seq_len(100), y7 = y7, index = idx)
#f.
set.seed(123)
y8 <- (numeric(100))
e8 <- rnorm(100)
for(i in 3:100)
y8[i] <- -0.8*y8[i-1] + 0.3*y8[i-2] + e8[i]
sim4 <- tsibble(idx = seq_len(100), y8 = y8, index = idx)
#g.
plot7 <- sim3 %>% autoplot(y7) + labs( title = "ARMA(1,1) Phi = 0.6 and Theta = 0.6")
plot8 <- sim4 %>% autoplot(y8) + labs( title = "AR(2) Phi1 = -0.8 and Phi2 = 0.3")
grid.arrange(plot7, plot8, nrow = 2)
## The ARMA(1,1) model seems to be approaching stationarity. Perhaps by
decreasing Phi we could achieve this. The AR(2) model has negative
coefficient of -.8 which will cause the first term to alternate between
a positive and negative value. We can also observe that the AR(2) model
shows larger values as time progresses due to the Phi2 term. \[9.7\]
autoplot(aus_airpassengers)
## Plot variable not specified, automatically selected `.vars = Passengers`
#a.
fit <- aus_airpassengers %>%
model(ARIMA(Passengers))
report(fit)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
fit %>% gg_tsresiduals()
fit %>% forecast(h=10) %>%
autoplot(aus_airpassengers)
## b. ARIMA(0,2,1): ((1−B)^2)Yt=(1+(Θ1)B)ϵt
#c.
fit2 <- aus_airpassengers %>%
model(ARIMA(Passengers ~ 1 + pdq(0,1,0)))
report(fit2)
## Series: Passengers
## Model: ARIMA(0,1,0) w/ drift
##
## Coefficients:
## constant
## 1.4191
## s.e. 0.3014
##
## sigma^2 estimated as 4.271: log likelihood=-98.16
## AIC=200.31 AICc=200.59 BIC=203.97
fit2 %>% forecast(h=10) %>%
autoplot(aus_airpassengers)
## The forecasts look very similar, upward trending
#d.
fit3 <- aus_airpassengers %>%
model(ARIMA(Passengers ~ 1 + pdq(2,1,2)))
report(fit3)
## Series: Passengers
## Model: ARIMA(2,1,2) w/ drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## -0.5518 -0.7327 0.5895 1.0000 3.2216
## s.e. 0.1684 0.1224 0.0916 0.1008 0.7224
##
## sigma^2 estimated as 4.031: log likelihood=-96.23
## AIC=204.46 AICc=206.61 BIC=215.43
fit3 %>% forecast(h=10) %>%
autoplot(aus_airpassengers)
fit4 <- aus_airpassengers %>%
model(ARIMA(Passengers ~ 0 + pdq(2,1,2)))
## Warning: 1 error encountered for ARIMA(Passengers ~ 0 + pdq(2, 1, 2))
## [1] non-stationary AR part from CSS
report(fit4)
## Series: Passengers
## Model: NULL model
## NULL model
#e.
fit5 <- aus_airpassengers %>%
model(ARIMA(Passengers ~ 1 + pdq(0,2,1)))
## Warning: Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
report(fit5)
## Series: Passengers
## Model: ARIMA(0,2,1) w/ poly
##
## Coefficients:
## ma1 constant
## -1.0000 0.0571
## s.e. 0.0585 0.0213
##
## sigma^2 estimated as 3.855: log likelihood=-95.1
## AIC=196.21 AICc=196.79 BIC=201.63
fit5 %>% forecast(h=10) %>%
autoplot(aus_airpassengers)
## We can see that the line has become steeper and forecasts are higher
than seen in previous graphs.
\[9.8\]
us <- global_economy %>%
filter(Country == "United States")
autoplot(us)
## Plot variable not specified, automatically selected `.vars = GDP`
#a.
lambda_us <- us %>%
features(GDP,features=guerrero) %>%
pull(lambda_guerrero)
lambda_us
## [1] 0.2819443
us %>% autoplot(box_cox(GDP, lambda_us))
## transformation has helped straighten the line.
#b.
fit_us <- us %>%
model(ARIMA(box_cox(GDP, lambda_us)))
report(fit_us)
## Series: GDP
## Model: ARIMA(1,1,0) w/ drift
## Transformation: box_cox(GDP, lambda_us)
##
## Coefficients:
## ar1 constant
## 0.4586 118.1822
## s.e. 0.1198 9.5047
##
## sigma^2 estimated as 5479: log likelihood=-325.32
## AIC=656.65 AICc=657.1 BIC=662.78
#c.
fit_others <- us %>%
model(
"ARIMA(2,1,2)" = ARIMA(box_cox(GDP, lambda_us) ~ 1 + pdq(2,1,2)),
"ARIMA(0,1,0)" = ARIMA(box_cox(GDP, lambda_us) ~ 1 + pdq(0,1,0)),
"ARIMA(1,1,0)" = ARIMA(box_cox(GDP, lambda_us) ~ 1 + pdq(1,1,0))
)
glance(fit_others) %>%
arrange (AIC) %>%
select(.model:BIC)
#d.
fit_us %>% gg_tsresiduals()
## The residuals are distributed evenly on the time plot. We can also
observe that the residuals look like white noise on the ACF plot.
#e.
fit_us %>%
forecast(h=5) %>%
autoplot(us)
#f.
fit_ets <- us %>%
model(ETS(GDP))
fit_ets %>%
forecast(h=5) %>%
autoplot(us)
## We can observe that the prediction intervals for the ETS() model are
a lot wider than for the ARIMA() model. And if we look at the model
report, we can also realize that the AIC is a lot larger for the ETS()
model.
report(fit_ets)
## Series: GDP
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9990876
## beta = 0.5011949
##
## Initial states:
## l[0] b[0]
## 448093333334 64917355687
##
## sigma^2: 7e-04
##
## AIC AICc BIC
## 3190.787 3191.941 3201.089