Yes they all show that the data are white noise because all of the spikes on all 3 graphs fall within the dashed ACF plot. The main difference is the length of the time-series.
Spikes in the ACF to lie within ±2/sqrt(T) where T is the length of the time series. So the main difference in this case the length of the time series which produces different sized bands of ACF plots.
The stock prices in the basic time-plot are clearly not stationary over time with a relatively steady trend upward. The ACF slowly decreases over time which is an indication of a series that needs differencing. Additionally the PACF indicates that the closing price is predictable using lagged values which indicates it isn’t stationary.
am<-gafa_stock%>%filter(Symbol=="AMZN")
autoplot(am,Close)+
labs(title = "Daily Closing Price of Amazon",
y = "Closing Price $USD")
am %>%
ACF(Close) %>% autoplot()+ labs(title = "ACF White noise")
am %>%
PACF(Close) %>% autoplot()+ labs(title = "PACF White noise")
It seems to be following an exponential function and needs a first difference transformation.
tu<-global_economy%>%filter(Country=="Turkey")
autoplot(tu,GDP)
tu %>%
features(GDP, unitroot_ndiffs)
## # A tibble: 1 x 2
## Country ndiffs
## <fct> <int>
## 1 Turkey 1
tu %>%
features(GDP, unitroot_nsdiffs)
## # A tibble: 1 x 2
## Country nsdiffs
## <fct> <int>
## 1 Turkey 0
tu %>%
features(GDP, unitroot_kpss)
## # A tibble: 1 x 3
## Country kpss_stat kpss_pvalue
## <fct> <dbl> <dbl>
## 1 Turkey 1.22 0.01
tu %>%
mutate(diff_close = difference(GDP)) %>%
features(diff_close, unitroot_kpss)
## # A tibble: 1 x 3
## Country kpss_stat kpss_pvalue
## <fct> <dbl> <dbl>
## 1 Turkey 0.386 0.0834
lambda <- tu %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
tu %>%
autoplot(box_cox(difference(GDP), lambda)) +
labs(title = "Single Difference and Box-Cox Transformed GDP of Turkey with Lambda=-0.157")
It looks like there are seasonal differences along with a slight upward trend. So we will need a seasonal difference as well as first difference. In this case we will not box cox transform.
tu<-aus_accommodation%>%filter(State=="Tasmania")
autoplot(tu,Takings)
tu %>%
features(Takings, unitroot_ndiffs)
## # A tibble: 1 x 2
## State ndiffs
## <chr> <int>
## 1 Tasmania 1
tu %>%
features(Takings, unitroot_nsdiffs)
## # A tibble: 1 x 2
## State nsdiffs
## <chr> <int>
## 1 Tasmania 1
tu %>%
features(Takings, unitroot_kpss)
## # A tibble: 1 x 3
## State kpss_stat kpss_pvalue
## <chr> <dbl> <dbl>
## 1 Tasmania 1.88 0.01
tu %>%
mutate(diff_close = difference(Takings)) %>%
features(diff_close, unitroot_kpss)
## # A tibble: 1 x 3
## State kpss_stat kpss_pvalue
## <chr> <dbl> <dbl>
## 1 Tasmania 0.231 0.1
lambda <- tu %>%
features(Takings, features = guerrero) %>%
pull(lambda_guerrero)
tu %>%
autoplot(difference(difference(Takings))) +
labs(title = "Doubly Differenced Tasmanian Takings")
There seems to be seasonality and the tests bare that out so we will doubly difference this time series to make it stationary.
tu<-souvenirs
autoplot(tu,Sales)
tu %>%
features(Sales, unitroot_ndiffs)
## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 1
tu %>%
features(Sales, unitroot_nsdiffs)
## # A tibble: 1 x 1
## nsdiffs
## <int>
## 1 1
tu %>%
features(Sales, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 1.31 0.01
tu %>%
mutate(diff_close = difference(Sales)) %>%
features(diff_close, unitroot_kpss)
## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.240 0.1
lambda <- tu %>%
features(Sales, features = guerrero) %>%
pull(lambda_guerrero)
tu %>%
autoplot(box_cox(difference(difference(Sales)),lambda)) +
labs(title = "Doubly Differenced Monthly Sales")
set.seed(19865)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
tu<-myseries
autoplot(tu,Turnover)
tu %>%
features(Turnover, unitroot_ndiffs)
## # A tibble: 1 x 3
## State Industry ndiffs
## <chr> <chr> <int>
## 1 South Australia Other retailing 1
tu %>%
features(Turnover, unitroot_nsdiffs)
## # A tibble: 1 x 3
## State Industry nsdiffs
## <chr> <chr> <int>
## 1 South Australia Other retailing 1
tu %>%
features(Turnover, unitroot_kpss)
## # A tibble: 1 x 4
## State Industry kpss_stat kpss_pvalue
## <chr> <chr> <dbl> <dbl>
## 1 South Australia Other retailing 7.38 0.01
tu %>%
mutate(diff_close = difference(Turnover)) %>%
features(diff_close, unitroot_kpss)
## # A tibble: 1 x 4
## State Industry kpss_stat kpss_pvalue
## <chr> <chr> <dbl> <dbl>
## 1 South Australia Other retailing 0.0563 0.1
lambda <- tu %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
tu %>%
autoplot(box_cox(difference(difference(Turnover)),lambda)
)+
labs(title = "Doubly Differenced Monthly Turnover with box cox transformation with lambda .0845")
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
As ϕ1 changes the time series pattern also changes. The smaller the number the less impactful the previous term is on the prediction.
autoplot(sim)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.9*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
autoplot(sim)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.1*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
autoplot(sim)
y <- numeric(100)
e <- rnorm(100, sd=1)
for(i in 2:100)
y[i] <- 0.6*e[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
Similar to the model above as theta decreases the ipact that the previous value has on the next value decreases.
y <- numeric(100)
e <- rnorm(100, sd=1)
for(i in 2:100)
y[i] <- -.9*e[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
autoplot(sim)
y <- numeric(100)
e <- rnorm(100, sd=1)
for(i in 2:100)
y[i] <- .8*e[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
autoplot(sim)
y <- numeric(100)
e <- rnorm(100, sd=1)
for(i in 2:100)
y[i] <- -.05*e[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
autoplot(sim)
y <- numeric(100)
e <- rnorm(100, sd=1)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + 0.6*e[i-1]+ e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
y <- ts(numeric(100))
e <- rnorm(100, sd=1)
for(i in 3:100)
y[i] <- -0.8*y[i-1]+0.3*y[i-2] + e[i]
sim2 <- tsibble(idx = seq_len(100), y = y, index = idx)
The ARMA model is appears much more random then the AR(2) model. The ARMA is also stationary while the AR(2) is not. Addiotnally the AR(2) has a distinct pattern where the absolute value of the prediction increases over time. After a short lag the ARMA has very little autocorrelation compared to the AR(2) model.
autoplot(sim)
autoplot(sim2)
acf(sim)
acf(sim2)
ARIMA (0,2,1) was elected. The aCf indicates that the residuals are white noise.
fit<-aus_airpassengers%>%
model(
search = ARIMA(Passengers, stepwise=FALSE))
glance(fit) %>% arrange(AICc)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 search 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
r<-fit%>%report()
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
f<-fit%>%forecast(h=10)
f%>%autoplot(aus_airpassengers)+
labs(title = "Forecast of Australian airline passengers")
fit%>% gg_tsresiduals()
\[(1-B)^2y_{t}\]
This model’s AIc is slightly higher than the example in part a so we would stick with the original model.
fit<-aus_airpassengers%>%
model(arima010 = ARIMA(Passengers ~ pdq(0,1,0))
# ,
# arima212 = ARIMA(Passengers ~ pdq(2,1,2)),
# arima021 = ARIMA(Passengers ~ pdq(0,2,1))\
)
glance(fit) %>% arrange(AICc)
## # A tibble: 1 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima010 4.27 -98.2 200. 201. 204. <cpl [0]> <cpl [0]>
r<-fit%>%report()
## Series: Passengers
## Model: ARIMA(0,1,0) w/ drift
##
## Coefficients:
## constant
## 1.4191
## s.e. 0.3014
##
## sigma^2 estimated as 4.271: log likelihood=-98.16
## AIC=200.31 AICc=200.59 BIC=203.97
f<-fit%>%forecast(h=10)
f%>%autoplot(aus_airpassengers)+
labs(title = "Forecast of Australian airline passengers")
fit%>% gg_tsresiduals()
this model has the highest AIc so the least likely choice. Removing the constant causes an error and issues plotting hte model due to NA production.
fit<-aus_airpassengers%>%
model(arima212 = ARIMA(Passengers ~ pdq(2,1,2)+1)
# ,
# arima212 = ARIMA(Passengers ~ pdq(2,1,2)),
# arima021 = ARIMA(Passengers ~ pdq(0,2,1))\
)
#glance(fit) %>% arrange(AICc)
r<-fit%>%report()
## Series: Passengers
## Model: ARIMA(2,1,2) w/ drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## -0.5518 -0.7327 0.5895 1.0000 3.2216
## s.e. 0.1684 0.1224 0.0916 0.1008 0.7224
##
## sigma^2 estimated as 4.031: log likelihood=-96.23
## AIC=204.46 AICc=206.61 BIC=215.43
f<-fit%>%forecast(h=10)
f%>%autoplot(aus_airpassengers)+
labs(title = "Forecast of Australian airline passengers")
fit%>% gg_tsresiduals()
This model has the lowest AIc so should be chosen above the others previously tested.
fit<-aus_airpassengers%>%
model(arima212 = ARIMA(Passengers ~ pdq(0,2,1)+1)
# ,
# arima212 = ARIMA(Passengers ~ pdq(2,1,2)),
# arima021 = ARIMA(Passengers ~ pdq(0,2,1))\
)
#glance(fit) %>% arrange(AICc)
r<-fit%>%report()
## Series: Passengers
## Model: ARIMA(0,2,1) w/ poly
##
## Coefficients:
## ma1 constant
## -1.0000 0.0571
## s.e. 0.0585 0.0213
##
## sigma^2 estimated as 3.855: log likelihood=-95.1
## AIC=196.21 AICc=196.79 BIC=201.63
f<-fit%>%forecast(h=10)
f%>%autoplot(aus_airpassengers)+
labs(title = "Forecast of Australian airline passengers")
fit%>% gg_tsresiduals()
us<-global_economy%>%filter(Country=="United States")
lambda <- us %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
us$GDP<-box_cox(us$GDP, lambda)
fit<-us%>%
model(ARIMA100=ARIMA(GDP~pdq(1,0,0)),
ARIMA210=ARIMA(GDP~pdq(2,1,0)),
ARIMA010=ARIMA(GDP~pdq(0,1,0)),
ARIMA110=ARIMA(GDP~pdq(1,1,0)),
ARIMA011=ARIMA(GDP~pdq(0,1,1)),
ARIMA011c=ARIMA(GDP~pdq(0,1,1)+1),
ARIMA021=ARIMA(GDP~pdq(0,2,1)),
ARIMA021c=ARIMA(GDP~pdq(0,2,1)+1),
search = ARIMA(GDP, stepwise=FALSE))
glance(fit) %>% arrange(AICc)
## # A tibble: 8 x 9
## Country .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 United States ARIMA021 6020. -323. 650. 650. 654. <cpl [0]> <cpl [1]>
## 2 United States ARIMA021c 6115. -323. 652. 652. 658. <cpl [0]> <cpl [1]>
## 3 United States ARIMA110 5479. -325. 657. 657. 663. <cpl [1]> <cpl [0]>
## 4 United States search 5479. -325. 657. 657. 663. <cpl [1]> <cpl [0]>
## 5 United States ARIMA011 5689. -326. 659. 659. 665. <cpl [0]> <cpl [1]>
## 6 United States ARIMA011c 5689. -326. 659. 659. 665. <cpl [0]> <cpl [1]>
## 7 United States ARIMA210 5580. -325. 659. 659. 667. <cpl [2]> <cpl [0]>
## 8 United States ARIMA010 6774. -332. 668. 668. 672. <cpl [0]> <cpl [0]>
r<-fit%>%report()
f<-fit%>%forecast(h=10)
f
## # A fable: 90 x 5 [1Y]
## # Key: Country, .model [9]
## Country .model Year GDP .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 United States ARIMA100 2018 NA NA
## 2 United States ARIMA100 2019 NA NA
## 3 United States ARIMA100 2020 NA NA
## 4 United States ARIMA100 2021 NA NA
## 5 United States ARIMA100 2022 NA NA
## 6 United States ARIMA100 2023 NA NA
## 7 United States ARIMA100 2024 NA NA
## 8 United States ARIMA100 2025 NA NA
## 9 United States ARIMA100 2026 NA NA
## 10 United States ARIMA100 2027 NA NA
## # … with 80 more rows
The search ended up having a higher AIC then the ARIMA(0,2,1) which had the lowest AIC of 650
The residuals all point to being white noise
fit<-us%>%
model(
ARIMA021=ARIMA(GDP~pdq(0,2,1)))
r<-fit%>%report()
## Series: GDP
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.6683
## s.e. 0.1384
##
## sigma^2 estimated as 6020: log likelihood=-322.92
## AIC=649.84 AICc=650.07 BIC=653.9
f<-fit%>%forecast(h=10)
f%>%autoplot(us)+
labs(title = "Forecast of US GDP")
fit%>% gg_tsresiduals()
The forecast of the fitted model seems reasonable and fits with the trend of previous data.
Whiel the forecast and residuals look to be good the AIC is much higher inidcating ARIMA is more accurate.
us<-global_economy%>%filter(Country=="United States")
fit<-us%>%
model(
ETs=ETS(GDP))
r<-fit%>%report()
## Series: GDP
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9990876
## beta = 0.5011949
##
## Initial states:
## l[0] b[0]
## 448093333334 64917355687
##
## sigma^2: 7e-04
##
## AIC AICc BIC
## 3190.787 3191.941 3201.089
f<-fit%>%forecast(h=10)
f%>%autoplot(us)+
labs(title = "Forecast of US GDP")
fit%>% gg_tsresiduals()