library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(tsibbledata)
library(ggfortify)
## Loading required package: ggplot2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(USgas)
library(readr)
library(tidyr)
library(readxl)
library(httr)
library(feasts)
## Loading required package: fabletools
library(stats)
library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.6 v fable 0.3.1
## v lubridate 1.8.0
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
The difference in these figures is the upper and lower bands of the ACF limits. The more expansive the dataset the smaller the limits as the formula is +/-2/(T^.5). Since the visualizations are showing increasingly larger datasets this makes sense. However, they all indicate that the datasets are whitenoise as there are no violations of the ACF that are greater than 5%.
Critical values are at different distances from the mean of zero because by definition a white noise time series is a sequence of random numbers that cannot be predicted. The larger the data set the larger the distribution of random numbers, and because the collection of numbers is “random” critical values will vary among the three data sets.
daily_close_amzn = gafa_stock %>% filter(Symbol=='AMZN') %>% select(Close)
daily_close_amzn %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Close`
daily_close_amzn %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Close`
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.
daily_close_amzn %>% PACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Close`
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.
The initial autoplot indicates that there are non proportional changes over time with influences by underlying patterns. Given this is a technology stock that focuses on wholesales we can infer that these changes are seasonal in nature and sales are affected by seasonality. Seasonality cannot be modeled in an ARIMA model as non stationary data cannot be converted to stationary without removing the seasonal component.
The ACF and PACF further solidify that the data is not stationary as the limits are violated. In the ACF all limits are violated with a downward trend over time indicating non stationary data. In the PACF there are 4-5 violations of the limits also indicating non stationary data.
turkish_gdp = global_economy %>% filter(Country=='Turkey') %>% select(GDP)
turkish_gdp_acf = turkish_gdp %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = GDP`
turkish_gdp_acf_dif = turkish_gdp %>% ACF(difference(GDP)) %>% autoplot()
turkish_gdp_lambda = turkish_gdp %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
turkish_gdp_acf_dif_boxcox = turkish_gdp %>% ACF(difference(box_cox(GDP,turkish_gdp_lambda))) %>% autoplot()
turkish_gdp_acf
turkish_gdp_acf_dif
turkish_gdp_acf_dif_boxcox #best solution
aus_accommodation_tasmania = aus_accommodation %>% filter(State=='Tasmania') %>% select(Takings)
aus_accommodation_tasmania_acf = aus_accommodation_tasmania %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Takings`
aus_accommodation_tasmania_acf_dif = aus_accommodation_tasmania %>% ACF(difference(difference(Takings,4),1)) %>% autoplot()
aus_accommodation_tasmania_lambda = aus_accommodation_tasmania %>%
features(Takings, features = guerrero) %>%
pull(lambda_guerrero)
aus_accommodation_tasmania_acf_dif_boxcox = aus_accommodation_tasmania %>% ACF(difference(difference(box_cox(Takings,aus_accommodation_tasmania_lambda),4),1)) %>% autoplot()
aus_accommodation_tasmania_acf_log_dif = aus_accommodation_tasmania %>% ACF(difference(difference(log(Takings),4),1)) %>% autoplot()
aus_accommodation_tasmania_acf
aus_accommodation_tasmania_acf_dif # best solution
aus_accommodation_tasmania_acf_log_dif
aus_accommodation_tasmania_acf_dif_boxcox
souvenirs_data = souvenirs
souvenirs_data_acf = souvenirs_data %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Sales`
souvenirs_data_acf_dif = souvenirs_data %>% ACF(difference(difference(Sales,12),1)) %>% autoplot()
souvenirs_data_lambda = souvenirs_data %>%
features(Sales, features = guerrero) %>%
pull(lambda_guerrero)
souvenirs_data_acf_dif_boxcox = souvenirs_data %>% ACF(difference(difference(box_cox(Sales,souvenirs_data_lambda),12),1)) %>% autoplot()
souvenirs_data_acf_log_dif = souvenirs_data %>% ACF(difference(difference(log(Sales),12),1)) %>% autoplot()
souvenirs_data_acf
souvenirs_data_acf_dif
souvenirs_data_acf_dif_boxcox # best solution
souvenirs_data_acf_log_dif
set.seed(45678913)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_data = myseries %>% select(Month,Turnover)
myseries_data_plot = myseries_data %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`
myseries_data_acf = myseries_data %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = Turnover`
myseries_data_acf_dif = myseries_data %>% ACF(difference(difference(Turnover,12),1)) %>% autoplot()
myseries_data_lambda = myseries_data %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries_data_acf_dif_boxcox = myseries_data %>% ACF(difference(difference(box_cox(Turnover,myseries_data_lambda),12),1)) %>% autoplot()
myseries_data_acf_log_dif = myseries_data %>% ACF(difference(difference(log(Turnover),12),1)) %>% autoplot()
myseries_data_acf
myseries_data_acf_dif
myseries_data_acf_dif_boxcox #best solution
myseries_data_acf_log_dif
A box cox transformation with a second order difference of 12,1 achieved the best result though the stationarity is iffy in this situation.
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
sim_1plt = sim %>% autoplot() + ggtitle("phi=0.6")
## Plot variable not specified, automatically selected `.vars = y`
sim_1acf = sim %>% ACF() %>% autoplot() + ggtitle("phi=0.6")
## Response variable not specified, automatically selected `var = y`
sim_1plt
sim_1acf
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.1*y[i-1] + e[i]
sim_2 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.2*y[i-1] + e[i]
sim_3 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.3*y[i-1] + e[i]
sim_4 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.4*y[i-1] + e[i]
sim_5 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.5*y[i-1] + e[i]
sim_6 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.7*y[i-1] + e[i]
sim_7 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.8*y[i-1] + e[i]
sim_8 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.9*y[i-1] + e[i]
sim_9 <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 1*y[i-1] + e[i]
sim_10 <- tsibble(idx = seq_len(100), y = y, index = idx)
sim_2plt = sim_2 %>% autoplot() + ggtitle("phi=0.1")
## Plot variable not specified, automatically selected `.vars = y`
sim_2acf = sim_2 %>% ACF() %>% autoplot() + ggtitle("phi=0.1")
## Response variable not specified, automatically selected `var = y`
sim_3plt = sim_3 %>% autoplot() + ggtitle("phi=0.2")
## Plot variable not specified, automatically selected `.vars = y`
sim_3acf = sim_3 %>% ACF() %>% autoplot() + ggtitle("phi=0.2")
## Response variable not specified, automatically selected `var = y`
sim_4plt = sim_4 %>% autoplot() + ggtitle("phi=0.3")
## Plot variable not specified, automatically selected `.vars = y`
sim_4acf = sim_4 %>% ACF() %>% autoplot() + ggtitle("phi=0.3")
## Response variable not specified, automatically selected `var = y`
sim_5plt = sim_5 %>% autoplot() + ggtitle("phi=0.4")
## Plot variable not specified, automatically selected `.vars = y`
sim_5acf = sim_5 %>% ACF() %>% autoplot() + ggtitle("phi=0.4")
## Response variable not specified, automatically selected `var = y`
sim_6plt = sim_6 %>% autoplot() + ggtitle("phi=0.5")
## Plot variable not specified, automatically selected `.vars = y`
sim_6acf = sim_6 %>% ACF() %>% autoplot() + ggtitle("phi=0.5")
## Response variable not specified, automatically selected `var = y`
sim_7plt = sim_7 %>% autoplot() + ggtitle("phi=0.7")
## Plot variable not specified, automatically selected `.vars = y`
sim_7acf = sim_7 %>% ACF() %>% autoplot() + ggtitle("phi=0.7")
## Response variable not specified, automatically selected `var = y`
sim_8plt = sim_8 %>% autoplot() + ggtitle("phi=0.8")
## Plot variable not specified, automatically selected `.vars = y`
sim_8acf = sim_8 %>% ACF() %>% autoplot() + ggtitle("phi=0.8")
## Response variable not specified, automatically selected `var = y`
sim_9plt = sim_9 %>% autoplot() + ggtitle("phi=0.9")
## Plot variable not specified, automatically selected `.vars = y`
sim_9acf = sim_9 %>% ACF() %>% autoplot() + ggtitle("phi=0.9")
## Response variable not specified, automatically selected `var = y`
sim_10plt = sim_10 %>% autoplot() + ggtitle("phi=1")
## Plot variable not specified, automatically selected `.vars = y`
sim_10acf = sim_10 %>% ACF() %>% autoplot() + ggtitle("phi=1")
## Response variable not specified, automatically selected `var = y`
sim_2plt
sim_2acf
sim_3plt
sim_3acf
sim_4plt
sim_4acf
sim_5plt
sim_5acf
sim_6plt
sim_6acf
sim_1plt
sim_1acf
sim_7plt
sim_7acf
sim_8plt
sim_8acf
sim_9plt
sim_9acf
sim_10plt
sim_10acf
y_rewrite <- ts(numeric(500))
e_rewrite <- rnorm(500)
for(i in 2:500)
y_rewrite[i] <- 0.6*e_rewrite[i-1] + e_rewrite[i]
y_rewrite %>% autoplot() + ggtitle("phi=.6")
as_tsibble(y_rewrite) %>% ACF() %>% autoplot() + ggtitle("theta=.6")
## Response variable not specified, automatically selected `var = value`
y_rewrite <- ts(numeric(500))
e_rewrite <- rnorm(500)
for(i in 2:500)
y_rewrite[i] <- 0.3*e_rewrite[i-1] + e_rewrite[i]
y_rewrite %>% autoplot() + ggtitle("phi=.3")
as_tsibble(y_rewrite) %>% ACF() %>% autoplot() + ggtitle("theta=.3")
## Response variable not specified, automatically selected `var = value`
y_rewrite <- ts(numeric(500))
e_rewrite <- rnorm(500)
for(i in 2:500)
y_rewrite[i] <- 0.8*e_rewrite[i-1] + e_rewrite[i]
y_rewrite %>% autoplot() + ggtitle("phi=.8")
as_tsibble(y_rewrite) %>% ACF() %>% autoplot() + ggtitle("theta=.8")
## Response variable not specified, automatically selected `var = value`
y_t_p <- ts(numeric(500))
e_t_p <- rnorm(500)
for(i in 2:500)
y_t_p[i] <- 0.6*e_t_p[i-1] + 0.6*y_t_p[i-1] + e_t_p[i]
y_t_p %>% autoplot()
as_tsibble(y_t_p) %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = value`
y_t_p <- ts(numeric(500))
e_t_p <- rnorm(500)
for(i in 2:500)
y_t_p[i] <- 0.8*y_t_p[i-1] + 0.3*y_t_p[i-1] + e_t_p[i]
y_t_p %>% autoplot()
as_tsibble(y_t_p) %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = value`
afit = aus_airpassengers %>% model(search = ARIMA(Passengers,stepwise=FALSE))
report(afit)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
glance(afit)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 search 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
afit %>% gg_tsresiduals()
afit %>% forecast(h=10) %>% autoplot(aus_airpassengers)
\[ (1-B)^2y_t = c + (1+\theta_1B)e_t \]
afit_arima010 = aus_airpassengers %>% model(arima010 = ARIMA(Passengers~pdq(0,1,0),stepwise=FALSE))
report(afit_arima010)
## Series: Passengers
## Model: ARIMA(0,1,0) w/ drift
##
## Coefficients:
## constant
## 1.4191
## s.e. 0.3014
##
## sigma^2 estimated as 4.271: log likelihood=-98.16
## AIC=200.31 AICc=200.59 BIC=203.97
glance(afit_arima010)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima010 4.27 -98.2 200. 201. 204. <cpl [0]> <cpl [0]>
afit_arima010 %>% gg_tsresiduals()
afit_arima010 %>% forecast(h=10) %>% autoplot(aus_airpassengers)
afit_arima212 = aus_airpassengers %>% model(arima212 = ARIMA(Passengers~1+pdq(2,1,2),stepwise=FALSE))
report(afit_arima212)
## Series: Passengers
## Model: ARIMA(2,1,2) w/ drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## -0.5518 -0.7327 0.5895 1.0000 3.2216
## s.e. 0.1684 0.1224 0.0916 0.1008 0.7224
##
## sigma^2 estimated as 4.031: log likelihood=-96.23
## AIC=204.46 AICc=206.61 BIC=215.43
glance(afit_arima212)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima212 4.03 -96.2 204. 207. 215. <cpl [2]> <cpl [2]>
afit_arima212 %>% gg_tsresiduals()
afit_arima212 %>% forecast(h=10) %>% autoplot(aus_airpassengers)
afit_arima212 = aus_airpassengers %>% model(arima212 = ARIMA(Passengers~0+pdq(2,1,2),stepwise=FALSE))
## Warning: 1 error encountered for arima212
## [1] non-stationary AR part from CSS
report(afit_arima212)
## Series: Passengers
## Model: NULL model
## NULL model
afit_arima021 = aus_airpassengers %>% model(arima021 = ARIMA(Passengers~pdq(0,2,1),stepwise=FALSE))
report(afit_arima021)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
glance(afit_arima021)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima021 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
afit_arima021 %>% gg_tsresiduals()
afit_arima021 %>% forecast(h=10) %>% autoplot(aus_airpassengers)
us_gdp = global_economy %>% filter(Country=='United States') %>% select(GDP)
us_gdpplot = us_gdp %>% autoplot()
## Plot variable not specified, automatically selected `.vars = GDP`
us_gdp_acf = us_gdp %>% ACF() %>% autoplot()
## Response variable not specified, automatically selected `var = GDP`
us_gdp_acf_dif = us_gdp %>% ACF(difference(GDP)) %>% autoplot()
us_gdp_lambda = us_gdp %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
us_gdp_acf_dif_boxcox = us_gdp %>% ACF(difference(box_cox(GDP,us_gdp_lambda))) %>% autoplot()
us_gdpplot
us_gdp_acf
us_gdp_acf_dif
us_gdp_acf_dif_boxcox #best transformation
us_gdp <- us_gdp %>%
mutate(GDP = difference(box_cox(GDP,us_gdp_lambda)))
usfit_arima_seach = us_gdp %>% model(search = ARIMA(GDP,stepwise=FALSE))
report(usfit_arima_seach)
## Series: GDP
## Model: ARIMA(1,0,0) w/ mean
##
## Coefficients:
## ar1 constant
## 0.4586 118.1789
## s.e. 0.1198 9.5047
##
## sigma^2 estimated as 5381: log likelihood=-325.32
## AIC=656.65 AICc=657.09 BIC=662.83
glance(usfit_arima_seach)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 search 5381. -325. 657. 657. 663. <cpl [1]> <cpl [0]>
usfit_arima_seach %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
usfit_arima_seach %>% forecast(h=10) %>% autoplot(us_gdp)
## Warning: Removed 1 row(s) containing missing values (geom_path).
usfit_arima_1 = us_gdp %>% model(search = ARIMA(GDP~pdq(0,2,0),stepwise=FALSE))
report(usfit_arima_1)
## Series: GDP
## Model: ARIMA(0,2,0)
##
## sigma^2 estimated as 18100: log likelihood=-348.14
## AIC=698.28 AICc=698.35 BIC=700.3
glance(usfit_arima_1)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 search 18100. -348. 698. 698. 700. <cpl [0]> <cpl [0]>
usfit_arima_1 %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
usfit_arima_1 %>% forecast(h=10) %>% autoplot(us_gdp)
## Warning: Removed 1 row(s) containing missing values (geom_path).
usfit_arima_2 = us_gdp %>% model(search = ARIMA(GDP~1+pdq(2,1,2),stepwise=FALSE))
report(usfit_arima_2)
## Series: GDP
## Model: ARIMA(2,1,2) w/ drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## 0.9874 -0.4125 -1.4710 0.6307 0.7148
## s.e. 0.3968 0.2832 0.3599 0.3771 1.6666
##
## sigma^2 estimated as 5850: log likelihood=-320.64
## AIC=653.27 AICc=654.95 BIC=665.53
glance(usfit_arima_2)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 search 5850. -321. 653. 655. 666. <cpl [2]> <cpl [2]>
usfit_arima_2 %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
usfit_arima_2 %>% forecast(h=10) %>% autoplot(us_gdp)
## Warning: Removed 1 row(s) containing missing values (geom_path).
no_trans_us_gdp = global_economy %>% filter(Country=='United States') %>% select(GDP)
no_trans_us_gdp %>% model(ETS(GDP))%>% gg_tsresiduals()
no_trans_us_gdp %>% model(ETS(GDP)) %>% forecast(h=10) %>% autoplot(no_trans_us_gdp)