library(tsibble)
## 
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(tsibbledata)
library(ggfortify)
## Loading required package: ggplot2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(USgas)
library(readr)
library(tidyr)
library(readxl)
library(httr)
library(feasts)
## Loading required package: fabletools
library(stats)
library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble    3.1.6     v fable     0.3.1
## v lubridate 1.8.0
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()     masks base::date()
## x dplyr::filter()       masks stats::filter()
## x tsibble::intersect()  masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag()          masks stats::lag()
## x tsibble::setdiff()    masks base::setdiff()
## x tsibble::union()      masks base::union()
library(seasonal)
## 
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
## 
##     view

q.9.1.a

The difference in these figures is the upper and lower bands of the ACF limits. The more expansive the dataset the smaller the limits as the formula is +/-2/(T^.5). Since the visualizations are showing increasingly larger datasets this makes sense. However, they all indicate that the datasets are whitenoise as there are no violations of the ACF that are greater than 5%.

q.9.1.b

Critical values are at different distances from the mean of zero because by definition a white noise time series is a sequence of random numbers that cannot be predicted. The larger the data set the larger the distribution of random numbers, and because the collection of numbers is “random” critical values will vary among the three data sets.

daily_close_amzn = gafa_stock %>% filter(Symbol=='AMZN') %>% select(Close)

daily_close_amzn %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Close`

daily_close_amzn %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Close`
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

daily_close_amzn %>% PACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Close`
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

The initial autoplot indicates that there are non proportional changes over time with influences by underlying patterns. Given this is a technology stock that focuses on wholesales we can infer that these changes are seasonal in nature and sales are affected by seasonality. Seasonality cannot be modeled in an ARIMA model as non stationary data cannot be converted to stationary without removing the seasonal component.

The ACF and PACF further solidify that the data is not stationary as the limits are violated. In the ACF all limits are violated with a downward trend over time indicating non stationary data. In the PACF there are 4-5 violations of the limits also indicating non stationary data.

turkish_gdp = global_economy %>% filter(Country=='Turkey') %>% select(GDP)

turkish_gdp_acf = turkish_gdp %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = GDP`
turkish_gdp_acf_dif = turkish_gdp %>% ACF(difference(GDP)) %>% autoplot()

turkish_gdp_lambda = turkish_gdp %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

turkish_gdp_acf_dif_boxcox = turkish_gdp %>% ACF(difference(box_cox(GDP,turkish_gdp_lambda))) %>% autoplot()

turkish_gdp_acf

turkish_gdp_acf_dif

turkish_gdp_acf_dif_boxcox #best solution

aus_accommodation_tasmania = aus_accommodation %>% filter(State=='Tasmania') %>% select(Takings)



aus_accommodation_tasmania_acf = aus_accommodation_tasmania %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Takings`
aus_accommodation_tasmania_acf_dif = aus_accommodation_tasmania %>% ACF(difference(difference(Takings,4),1)) %>% autoplot()

aus_accommodation_tasmania_lambda = aus_accommodation_tasmania %>%
  features(Takings, features = guerrero) %>%
  pull(lambda_guerrero)

aus_accommodation_tasmania_acf_dif_boxcox = aus_accommodation_tasmania %>% ACF(difference(difference(box_cox(Takings,aus_accommodation_tasmania_lambda),4),1)) %>% autoplot()

aus_accommodation_tasmania_acf_log_dif = aus_accommodation_tasmania %>% ACF(difference(difference(log(Takings),4),1)) %>% autoplot()


aus_accommodation_tasmania_acf

aus_accommodation_tasmania_acf_dif # best solution

aus_accommodation_tasmania_acf_log_dif

aus_accommodation_tasmania_acf_dif_boxcox

souvenirs_data = souvenirs



souvenirs_data_acf = souvenirs_data %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Sales`
souvenirs_data_acf_dif = souvenirs_data %>% ACF(difference(difference(Sales,12),1)) %>% autoplot()

souvenirs_data_lambda = souvenirs_data %>%
  features(Sales, features = guerrero) %>%
  pull(lambda_guerrero)

souvenirs_data_acf_dif_boxcox = souvenirs_data %>% ACF(difference(difference(box_cox(Sales,souvenirs_data_lambda),12),1)) %>% autoplot()

souvenirs_data_acf_log_dif = souvenirs_data %>% ACF(difference(difference(log(Sales),12),1)) %>% autoplot()


souvenirs_data_acf

souvenirs_data_acf_dif 

souvenirs_data_acf_dif_boxcox # best solution

souvenirs_data_acf_log_dif

set.seed(45678913)

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_data = myseries %>% select(Month,Turnover)


myseries_data_plot = myseries_data %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`
myseries_data_acf = myseries_data %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Turnover`
myseries_data_acf_dif = myseries_data %>% ACF(difference(difference(Turnover,12),1)) %>% autoplot()

myseries_data_lambda = myseries_data %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

myseries_data_acf_dif_boxcox = myseries_data %>% ACF(difference(difference(box_cox(Turnover,myseries_data_lambda),12),1)) %>% autoplot()

myseries_data_acf_log_dif = myseries_data %>% ACF(difference(difference(log(Turnover),12),1)) %>% autoplot()

myseries_data_acf

myseries_data_acf_dif

myseries_data_acf_dif_boxcox #best solution

myseries_data_acf_log_dif

A box cox transformation with a second order difference of 12,1 achieved the best result though the stationarity is iffy in this situation.

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
sim_1plt = sim %>% autoplot() + ggtitle("phi=0.6")
## Plot variable not specified, automatically selected `.vars = y`
sim_1acf =  sim %>% ACF() %>% autoplot() + ggtitle("phi=0.6")
## Response variable not specified, automatically selected `var = y`
sim_1plt

sim_1acf

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.1*y[i-1] + e[i]
sim_2 <- tsibble(idx = seq_len(100), y = y, index = idx)

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.2*y[i-1] + e[i]
sim_3 <- tsibble(idx = seq_len(100), y = y, index = idx)

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.3*y[i-1] + e[i]
sim_4 <- tsibble(idx = seq_len(100), y = y, index = idx)

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.4*y[i-1] + e[i]
sim_5 <- tsibble(idx = seq_len(100), y = y, index = idx)

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.5*y[i-1] + e[i]
sim_6 <- tsibble(idx = seq_len(100), y = y, index = idx)

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.7*y[i-1] + e[i]
sim_7 <- tsibble(idx = seq_len(100), y = y, index = idx)

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.8*y[i-1] + e[i]
sim_8 <- tsibble(idx = seq_len(100), y = y, index = idx)

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.9*y[i-1] + e[i]
sim_9 <- tsibble(idx = seq_len(100), y = y, index = idx)

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 1*y[i-1] + e[i]
sim_10 <- tsibble(idx = seq_len(100), y = y, index = idx)


sim_2plt = sim_2 %>% autoplot() + ggtitle("phi=0.1")
## Plot variable not specified, automatically selected `.vars = y`
sim_2acf =  sim_2 %>% ACF() %>% autoplot() + ggtitle("phi=0.1")
## Response variable not specified, automatically selected `var = y`
sim_3plt = sim_3 %>% autoplot() + ggtitle("phi=0.2")
## Plot variable not specified, automatically selected `.vars = y`
sim_3acf =  sim_3 %>% ACF() %>% autoplot() + ggtitle("phi=0.2")
## Response variable not specified, automatically selected `var = y`
sim_4plt = sim_4 %>% autoplot() + ggtitle("phi=0.3")
## Plot variable not specified, automatically selected `.vars = y`
sim_4acf =  sim_4 %>% ACF() %>% autoplot() + ggtitle("phi=0.3")
## Response variable not specified, automatically selected `var = y`
sim_5plt = sim_5 %>% autoplot() + ggtitle("phi=0.4")
## Plot variable not specified, automatically selected `.vars = y`
sim_5acf =  sim_5 %>% ACF() %>% autoplot() + ggtitle("phi=0.4")
## Response variable not specified, automatically selected `var = y`
sim_6plt = sim_6 %>% autoplot() + ggtitle("phi=0.5")
## Plot variable not specified, automatically selected `.vars = y`
sim_6acf =  sim_6 %>% ACF() %>% autoplot() + ggtitle("phi=0.5")
## Response variable not specified, automatically selected `var = y`
sim_7plt = sim_7 %>% autoplot() + ggtitle("phi=0.7")
## Plot variable not specified, automatically selected `.vars = y`
sim_7acf =  sim_7 %>% ACF() %>% autoplot() + ggtitle("phi=0.7")
## Response variable not specified, automatically selected `var = y`
sim_8plt = sim_8 %>% autoplot() + ggtitle("phi=0.8")
## Plot variable not specified, automatically selected `.vars = y`
sim_8acf =  sim_8 %>% ACF() %>% autoplot() + ggtitle("phi=0.8")
## Response variable not specified, automatically selected `var = y`
sim_9plt = sim_9 %>% autoplot() + ggtitle("phi=0.9")
## Plot variable not specified, automatically selected `.vars = y`
sim_9acf =  sim_9 %>% ACF() %>% autoplot() + ggtitle("phi=0.9")
## Response variable not specified, automatically selected `var = y`
sim_10plt = sim_10 %>% autoplot() + ggtitle("phi=1")
## Plot variable not specified, automatically selected `.vars = y`
sim_10acf =  sim_10 %>% ACF() %>% autoplot() + ggtitle("phi=1")
## Response variable not specified, automatically selected `var = y`
sim_2plt

sim_2acf

sim_3plt

sim_3acf

sim_4plt

sim_4acf

sim_5plt

sim_5acf

sim_6plt

sim_6acf

sim_1plt

sim_1acf

sim_7plt

sim_7acf

sim_8plt

sim_8acf

sim_9plt

sim_9acf

sim_10plt

sim_10acf

y_rewrite <- ts(numeric(500))
e_rewrite <- rnorm(500)
for(i in 2:500)
  y_rewrite[i] <- 0.6*e_rewrite[i-1] + e_rewrite[i]
y_rewrite %>% autoplot() + ggtitle("phi=.6")

as_tsibble(y_rewrite) %>% ACF() %>% autoplot() + ggtitle("theta=.6")
## Response variable not specified, automatically selected `var = value`

y_rewrite <- ts(numeric(500))
e_rewrite <- rnorm(500)
for(i in 2:500)
  y_rewrite[i] <- 0.3*e_rewrite[i-1] + e_rewrite[i]

y_rewrite %>% autoplot() + ggtitle("phi=.3")

as_tsibble(y_rewrite) %>% ACF() %>% autoplot() + ggtitle("theta=.3")
## Response variable not specified, automatically selected `var = value`

y_rewrite <- ts(numeric(500))
e_rewrite <- rnorm(500)
for(i in 2:500)
  y_rewrite[i] <- 0.8*e_rewrite[i-1] + e_rewrite[i]

y_rewrite %>% autoplot() + ggtitle("phi=.8")

as_tsibble(y_rewrite) %>% ACF() %>% autoplot() + ggtitle("theta=.8")
## Response variable not specified, automatically selected `var = value`

y_t_p <- ts(numeric(500))
e_t_p <- rnorm(500)
for(i in 2:500)
  y_t_p[i] <- 0.6*e_t_p[i-1] + 0.6*y_t_p[i-1] + e_t_p[i]

y_t_p %>% autoplot()

as_tsibble(y_t_p) %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = value`

y_t_p <- ts(numeric(500))
e_t_p <- rnorm(500)
for(i in 2:500)
  y_t_p[i] <- 0.8*y_t_p[i-1] + 0.3*y_t_p[i-1] + e_t_p[i]

y_t_p %>% autoplot()

as_tsibble(y_t_p) %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = value`

afit = aus_airpassengers %>% model(search = ARIMA(Passengers,stepwise=FALSE))
report(afit)
## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65
glance(afit)
## # A tibble: 1 x 8
##   .model sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 search   4.31   -97.0  198.  198.  202. <cpl [0]> <cpl [1]>
afit %>% gg_tsresiduals()

afit %>% forecast(h=10) %>% autoplot(aus_airpassengers)

9.7.b

\[ (1-B)^2y_t = c + (1+\theta_1B)e_t \]

afit_arima010 = aus_airpassengers %>% model(arima010 = ARIMA(Passengers~pdq(0,1,0),stepwise=FALSE))
report(afit_arima010)
## Series: Passengers 
## Model: ARIMA(0,1,0) w/ drift 
## 
## Coefficients:
##       constant
##         1.4191
## s.e.    0.3014
## 
## sigma^2 estimated as 4.271:  log likelihood=-98.16
## AIC=200.31   AICc=200.59   BIC=203.97
glance(afit_arima010)
## # A tibble: 1 x 8
##   .model   sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arima010   4.27   -98.2  200.  201.  204. <cpl [0]> <cpl [0]>
afit_arima010 %>% gg_tsresiduals()

afit_arima010 %>% forecast(h=10) %>% autoplot(aus_airpassengers)

afit_arima212 = aus_airpassengers %>% model(arima212 = ARIMA(Passengers~1+pdq(2,1,2),stepwise=FALSE))
report(afit_arima212)
## Series: Passengers 
## Model: ARIMA(2,1,2) w/ drift 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2  constant
##       -0.5518  -0.7327  0.5895  1.0000    3.2216
## s.e.   0.1684   0.1224  0.0916  0.1008    0.7224
## 
## sigma^2 estimated as 4.031:  log likelihood=-96.23
## AIC=204.46   AICc=206.61   BIC=215.43
glance(afit_arima212)
## # A tibble: 1 x 8
##   .model   sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arima212   4.03   -96.2  204.  207.  215. <cpl [2]> <cpl [2]>
afit_arima212 %>% gg_tsresiduals()

afit_arima212 %>% forecast(h=10) %>% autoplot(aus_airpassengers)

afit_arima212 = aus_airpassengers %>% model(arima212 = ARIMA(Passengers~0+pdq(2,1,2),stepwise=FALSE))
## Warning: 1 error encountered for arima212
## [1] non-stationary AR part from CSS
report(afit_arima212)
## Series: Passengers 
## Model: NULL model 
## NULL model
afit_arima021 = aus_airpassengers %>% model(arima021 = ARIMA(Passengers~pdq(0,2,1),stepwise=FALSE))
report(afit_arima021)
## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65
glance(afit_arima021)
## # A tibble: 1 x 8
##   .model   sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arima021   4.31   -97.0  198.  198.  202. <cpl [0]> <cpl [1]>
afit_arima021 %>% gg_tsresiduals()

afit_arima021 %>% forecast(h=10) %>% autoplot(aus_airpassengers)

us_gdp = global_economy %>% filter(Country=='United States') %>% select(GDP)

us_gdpplot = us_gdp %>% autoplot()
## Plot variable not specified, automatically selected `.vars = GDP`
us_gdp_acf =  us_gdp %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = GDP`
us_gdp_acf_dif = us_gdp %>% ACF(difference(GDP)) %>% autoplot()

us_gdp_lambda = us_gdp %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

us_gdp_acf_dif_boxcox = us_gdp %>% ACF(difference(box_cox(GDP,us_gdp_lambda))) %>% autoplot()

us_gdpplot

us_gdp_acf

us_gdp_acf_dif

us_gdp_acf_dif_boxcox #best transformation

us_gdp <- us_gdp %>%
  mutate(GDP = difference(box_cox(GDP,us_gdp_lambda)))

usfit_arima_seach = us_gdp %>% model(search = ARIMA(GDP,stepwise=FALSE))
report(usfit_arima_seach)
## Series: GDP 
## Model: ARIMA(1,0,0) w/ mean 
## 
## Coefficients:
##          ar1  constant
##       0.4586  118.1789
## s.e.  0.1198    9.5047
## 
## sigma^2 estimated as 5381:  log likelihood=-325.32
## AIC=656.65   AICc=657.09   BIC=662.83
glance(usfit_arima_seach)
## # A tibble: 1 x 8
##   .model sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 search  5381.   -325.  657.  657.  663. <cpl [1]> <cpl [0]>
usfit_arima_seach %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).

usfit_arima_seach %>% forecast(h=10) %>% autoplot(us_gdp)
## Warning: Removed 1 row(s) containing missing values (geom_path).

usfit_arima_1 = us_gdp %>% model(search = ARIMA(GDP~pdq(0,2,0),stepwise=FALSE))
report(usfit_arima_1)
## Series: GDP 
## Model: ARIMA(0,2,0) 
## 
## sigma^2 estimated as 18100:  log likelihood=-348.14
## AIC=698.28   AICc=698.35   BIC=700.3
glance(usfit_arima_1)
## # A tibble: 1 x 8
##   .model sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 search 18100.   -348.  698.  698.  700. <cpl [0]> <cpl [0]>
usfit_arima_1 %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).

usfit_arima_1 %>% forecast(h=10) %>% autoplot(us_gdp)
## Warning: Removed 1 row(s) containing missing values (geom_path).

usfit_arima_2 = us_gdp %>% model(search = ARIMA(GDP~1+pdq(2,1,2),stepwise=FALSE))
report(usfit_arima_2)
## Series: GDP 
## Model: ARIMA(2,1,2) w/ drift 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2  constant
##       0.9874  -0.4125  -1.4710  0.6307    0.7148
## s.e.  0.3968   0.2832   0.3599  0.3771    1.6666
## 
## sigma^2 estimated as 5850:  log likelihood=-320.64
## AIC=653.27   AICc=654.95   BIC=665.53
glance(usfit_arima_2)
## # A tibble: 1 x 8
##   .model sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 search  5850.   -321.  653.  655.  666. <cpl [2]> <cpl [2]>

Best Model

usfit_arima_2 %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).

usfit_arima_2 %>% forecast(h=10) %>% autoplot(us_gdp)
## Warning: Removed 1 row(s) containing missing values (geom_path).

no_trans_us_gdp = global_economy %>% filter(Country=='United States') %>% select(GDP)

no_trans_us_gdp %>% model(ETS(GDP))%>% gg_tsresiduals()

no_trans_us_gdp %>% model(ETS(GDP)) %>% forecast(h=10) %>% autoplot(no_trans_us_gdp)