Question 1.
As the number of random values increases the critical values decreases. Because none of the data breaches the critical values, all data sets appear to be white noise.
When observations increase the critical values decrease due to how critical values are calculated. This also makes it harder for the data to be autocorrelated with each other as the sample size increases.
Question 2.
library(readxl)
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
amazon <- gafa_stock %>%
filter(Symbol == "AMZN") %>%
select(Date, Close)
amazon %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Close)) +
labs(title = "Amazon Stock Price Over Time", y = "Closing Price")
amazon %>% ACF(Close, lag_max = 10) %>% autoplot() +
labs(title = "ACF Plot of Amazon Stock")
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.
amazon %>% PACF(Close, lag_max = 10) %>% autoplot() +
labs(title = "PACF Plot of Amazon Stock")
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.
Just from plotting it, we can see that the data is not stationary. THe ACF shows lots of autocorrelation. This is becuase the values are a lot closer to the most recent observation than other values. The PACF backs this up by having lag 1 contain almost all of the autocorrelation.
Question 3.
turkey <- global_economy %>%
filter(Country == "Turkey")
turkey %>% autoplot(GDP) +
labs(title = "Turkey GDP")
#shouldnt need a box cox
turkey %>% autoplot(difference(GDP)) +
labs(title = "Turkey GDP (First Order Difference)")
## Warning: Removed 1 row(s) containing missing values (geom_path).
turkey %>%
mutate(diff_gdp = difference(GDP)) %>%
features(diff_gdp, unitroot_kpss)
## # A tibble: 1 × 3
## Country kpss_stat kpss_pvalue
## <fct> <dbl> <dbl>
## 1 Turkey 0.386 0.0834
accom <- aus_accommodation %>%
filter(State == "Tasmania")
accom %>% autoplot(Takings) +
labs(title = "Tasmanian Accomodations Takings")
#need to transform with log
accom %>% autoplot(log(Takings)) +
labs(title = "Tasmanian Accomodations Takings")
accom %>% autoplot(difference(log(Takings), 4,))+ labs(title = "Tasmanian Accomodations Takings")
## Warning: Removed 4 row(s) containing missing values (geom_path).
accom %>%
mutate(diff_takings = difference(log(Takings), 4,)) %>%
features(diff_takings, unitroot_kpss)
## # A tibble: 1 × 3
## State kpss_stat kpss_pvalue
## <chr> <dbl> <dbl>
## 1 Tasmania 0.199 0.1
souvenirs %>% autoplot() +
labs(title = "Monthly Souvenirs Sales")
## Plot variable not specified, automatically selected `.vars = Sales`
souvenirs_guerro <- souvenirs %>%
features(Sales, features = guerrero)
souvenirsbc <- souvenirs %>%
mutate(Sales = box_cox(Sales, souvenirs_guerro))
#I just used a simple box cox for this one
souvenirs %>%
mutate(diff_sales = difference(difference(Sales, 12,)), 1,) %>%
features(diff_sales, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.111 0.1
Question 4.
ar1 <- function(phi, n = 100) {
y <- numeric(n)
e <- rnorm(n)
for (i in 2:n) {
y[i] <- phi * y[i - 1] + e[i]
}
tsibble(idx = seq_len(n), y = y, index = idx)
}
ar1(0.5)
## # A tsibble: 100 x 2 [1]
## idx y
## <int> <dbl>
## 1 1 0
## 2 2 -1.35
## 3 3 0.318
## 4 4 -0.709
## 5 5 1.42
## 6 6 0.275
## 7 7 1.15
## 8 8 1.12
## 9 9 2.03
## 10 10 1.05
## # … with 90 more rows
ar1(0) %>%
autoplot() +
labs(title = "AR1(0)")
## Plot variable not specified, automatically selected `.vars = y`
ar1(-1) %>%
autoplot() +
labs(title = "AR1(-1)")
## Plot variable not specified, automatically selected `.vars = y`
ar1(1) %>%
autoplot() +
labs(title = "AR1(1)")
## Plot variable not specified, automatically selected `.vars = y`
#b. at .6 the data is stationary but at 1 it is a random walk and -1 is negative serial correlation.
ma1 = function(param = 0.6)
{
seed = 123
y <- ts(numeric(100))
forecast.err <- rnorm(100)
rand.err = rnorm(100)
for(i in 2:100)
y[i] <- param * forecast.err[i-1] + rand.err[i]
return(y)
}
ma1(0.6) %>%
as_tsibble() %>%
autoplot() +
labs(title = "MA1(0.6)")
## Plot variable not specified, automatically selected `.vars = value`
ma1(0) %>%
as_tsibble() %>%
autoplot() +
labs(title = "MA1(0)")
## Plot variable not specified, automatically selected `.vars = value`
ma1(1) %>%
as_tsibble() %>%
autoplot() +
labs(title = "MA1(1)")
## Plot variable not specified, automatically selected `.vars = value`
ma1(-1) %>%
as_tsibble() %>%
autoplot() +
labs(title = "MA1(-1)")
## Plot variable not specified, automatically selected `.vars = value`
#C. all looks like white noise to me
arma = function(alpha = 0.6, theta = 0.6)
{
seed = 123
y <- ts(numeric(100))
e <- rnorm(100)
rand.error = rnorm(100)
for(i in 2:100)
y[i] <- alpha * y[i-1] + theta * e[i] + rand.error[i]
return(y)
}
arma() %>%
as_tsibble() %>%
autoplot() +
labs(title = "ARMA(1,1)")
## Plot variable not specified, automatically selected `.vars = value`
ar2 = function(alpha1 = -0.8, alpha2 = 0.3)
{
seed = 123
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 3:100)
y[i] <- alpha1 * y[i-1] + alpha2 * y[i-2] + e[i]
return(y)
}
ar2() %>%
as_tsibble() %>%
autoplot() +
labs(title = "AR(2)")
## Plot variable not specified, automatically selected `.vars = value`
#g. arma model is stationary but ar2 is not. We can forcast arma but not ar2.
Question 5.
aus_airpassengers %>%
filter(Year < 2012)
## # A tsibble: 42 x 2 [1Y]
## Year Passengers
## <dbl> <dbl>
## 1 1970 7.32
## 2 1971 7.33
## 3 1972 7.80
## 4 1973 9.38
## 5 1974 10.7
## 6 1975 11.1
## 7 1976 10.9
## 8 1977 11.3
## 9 1978 12.1
## 10 1979 13.0
## # … with 32 more rows
airpassengers <- aus_airpassengers %>%
model(
'Auto' = ARIMA(Passengers) )
airpassengers %>%
gg_tsresiduals()
augment(airpassengers) %>%
features(.resid, ljung_box, lag = 10, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 Auto 6.70 0.754
airpassengers %>%
forecast(h = 10) %>%
autoplot(aus_airpassengers)
arm_mod <- aus_airpassengers %>%
model(
ARIMA(Passengers ~ pdq(0,1,0)))
arm_mod %>%
gg_tsresiduals()
augment(arm_mod) %>%
features(.resid, ljung_box, lag = 10, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Passengers ~ pdq(0, 1, 0)) 6.77 0.747
arm_mod %>%
forecast(h = 10) %>%
autoplot(aus_airpassengers)
#C. part a's forecast had a steeper trend
arm_212 <- aus_airpassengers %>%
model(
ARIMA(Passengers ~ pdq(0,2,1)))
arm_212 %>%
gg_tsresiduals()
augment(arm_212) %>%
features(.resid, ljung_box, lag = 10, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Passengers ~ pdq(0, 2, 1)) 6.70 0.754
arm_212 %>%
forecast(h = 10) %>%
autoplot(aus_airpassengers)
#d. part d's looked like part a's forecast.
airpassengers_constant <- aus_airpassengers %>%
model(
ARIMA(Passengers ~ 1 + pdq(0,2,1)))
## Warning: Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
airpassengers_constant %>%
gg_tsresiduals()
augment(airpassengers_constant) %>%
features(.resid, ljung_box, lag = 10, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Passengers ~ 1 + pdq(0, 2, 1)) 6.91 0.734
airpassengers_constant %>%
forecast(h = 10) %>%
autoplot(aus_airpassengers)
#the prediction interval with the constant is a lot narrower
Question 6.
us_gdp <- global_economy %>%
filter(Code == "USA") %>%
mutate(GDP = (GDP/1000000000000)) %>%
select(Year, GDP)
us_gdp %>%
ggplot(aes(x = Year, y = GDP)) +
geom_line(aes(y = GDP)) +
labs(title = "US GDP Since 1960", y = "GDP in Trillions")
us_gdp_guerro <- us_gdp %>%
features(GDP, features = guerrero)
us_gdp_boxCox <- us_gdp %>%
mutate(GDP = box_cox(GDP, us_gdp_guerro))
us_gdp_boxCox %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = GDP`
#no need for box cox
us_gdp %>%
gg_tsdisplay(GDP, plot_type = 'partial')
us_gdp %>%
features(GDP, unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 2
us_gdp_auto <- us_gdp %>%
model(
'Auto' = ARIMA(GDP))
us_gdp_auto %>%
report()
## Series: GDP
## Model: ARIMA(0,2,2)
##
## Coefficients:
## ma1 ma2
## -0.4206 -0.3048
## s.e. 0.1197 0.1078
##
## sigma^2 estimated as 0.02615: log likelihood=23.26
## AIC=-40.52 AICc=-40.06 BIC=-34.45
# R chose the (0,2,2) model.
us_compare <- us_gdp %>%
model(
'Auto' = ARIMA(GDP),
'Diffs Only' = ARIMA(GDP ~ pdq(0,2,0)),
'Autoregression + Diff' = ARIMA(GDP ~ pdq(1,2,0)),
"Moving Average + Diff" = ARIMA(GDP ~ pdq(0,2,1)),
"AR + Diff + MA" = ARIMA(GDP ~ pdq(1,2,1))
)
us_compare %>%
report()
## Warning in report.mdl_df(.): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.
## # A tibble: 5 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 Auto 0.0261 23.3 -40.5 -40.1 -34.4 <cpl [0]> <cpl [2]>
## 2 Diffs Only 0.0310 17.8 -33.5 -33.5 -31.5 <cpl [0]> <cpl [0]>
## 3 Autoregression + Diff 0.0306 18.6 -33.3 -33.0 -29.2 <cpl [1]> <cpl [0]>
## 4 Moving Average + Diff 0.0292 19.8 -35.6 -35.3 -31.5 <cpl [0]> <cpl [1]>
## 5 AR + Diff + MA 0.0263 23.1 -40.2 -39.7 -34.1 <cpl [1]> <cpl [1]>
# Arima (1,2,1) has autoregressive component so I am going to choose that one
us_gdp_total <- us_gdp%>%
model(
"AR + Diff + MA" = ARIMA(GDP ~ pdq(1,2,1))
)
us_gdp_auto %>%
gg_tsresiduals()
us_gdp_total %>%
gg_tsresiduals()
augment(us_gdp_auto) %>%
features(.resid, ljung_box, lag = 10, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 Auto 12.2 0.273
augment(us_gdp_total) %>%
features(.resid, ljung_box, lag = 10, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 AR + Diff + MA 14.1 0.169
us_gdp_total %>%
forecast(h = 10) %>%
autoplot(us_gdp)
#looks reasonable to me!
us_ets <- us_gdp %>%
model('Auto' = ETS(GDP),
'Multiplicative Trend' = ETS(GDP ~ error("A") + trend("M") + season("N")))
us_ets %>%
report()
## Warning in report.mdl_df(.): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.
## # A tibble: 2 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Auto 0.000455 23.8 -37.7 -36.5 -27.4 0.0280 0.128 0.0169
## 2 Multiplicative Trend 0.0318 -15.7 41.4 42.6 51.7 0.0296 0.137 0.0983
us_ets %>%
forecast(h = 10) %>%
autoplot(us_gdp)
#I personally prefer the arima because it seems more precise
Question 7.
us_arrivals <- aus_arrivals %>%
filter(Origin == "US")
us_arrivals
## # A tsibble: 127 x 3 [1Q]
## # Key: Origin [1]
## Quarter Origin Arrivals
## <qtr> <chr> <int>
## 1 1981 Q1 US 32316
## 2 1981 Q2 US 23721
## 3 1981 Q3 US 24533
## 4 1981 Q4 US 33438
## 5 1982 Q1 US 33527
## 6 1982 Q2 US 28366
## 7 1982 Q3 US 30856
## 8 1982 Q4 US 33293
## 9 1983 Q1 US 32472
## 10 1983 Q2 US 32369
## # … with 117 more rows
autoplot(us_arrivals)
## Plot variable not specified, automatically selected `.vars = Arrivals`
#looks to be increasing relatively constantly
us_arrivalsdiff <- us_arrivals %>%
gg_tsdisplay(difference(Arrivals, 4) %>%
difference(), plot_type = "partial" , lag = 16)
us_arrivalsdiff
## Warning: Removed 5 row(s) containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).
#c. based of acf I would say there is seasonality.
#d. the PACF shows that even removing th elag in ACP still produced autocorrelation.
# the models suggest seasonality
us_arrivalsfit <- us_arrivals %>%
model(ARIMA(Arrivals))
report(us_arrivalsfit)
## Series: Arrivals
## Model: ARIMA(3,0,3)(0,1,0)[4] w/ drift
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 constant
## -0.2265 -0.4655 0.0961 0.7863 0.9472 0.7735 4414.483
## s.e. 0.1234 0.0925 0.1255 0.0943 0.0551 0.1086 2270.019
##
## sigma^2 estimated as 55384491: log likelihood=-1269.65
## AIC=2555.3 AICc=2556.56 BIC=2577.8
us_arrivalsarima <- us_arrivals %>%
model(ARIMA(Arrivals ~ pdq(3,0,3) + PDQ(0,1,0)))
report(us_arrivalsarima)
## Series: Arrivals
## Model: ARIMA(3,0,3)(0,1,0)[4] w/ drift
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 constant
## -0.2265 -0.4655 0.0961 0.7863 0.9472 0.7735 4414.483
## s.e. 0.1234 0.0925 0.1255 0.0943 0.0551 0.1086 2270.019
##
## sigma^2 estimated as 55384491: log likelihood=-1269.65
## AIC=2555.3 AICc=2556.56 BIC=2577.8
us_arrivalsarima2 <- us_arrivals %>%
model(ARIMA(Arrivals ~ pdq(0,3,0) + PDQ(0,1,0)))
## Warning: Having 3 or more differencing operations is not recommended. Please
## consider reducing the total number of differences.
## Warning: It looks like you're trying to fully specify your ARIMA model but have not said if a constant should be included.
## You can include a constant using `ARIMA(y~1)` to the formula or exclude it by adding `ARIMA(y~0)`.
## Warning: 1 error encountered for ARIMA(Arrivals ~ pdq(0, 3, 0) + PDQ(0, 1, 0))
## [1] There are no ARIMA models to choose from after imposing the `order_constraint`, please consider allowing more models.
report(us_arrivalsarima2)
## Series: Arrivals
## Model: NULL model
## NULL model
Question 8.
gdp_data <- read_excel("C:/Users/ryanf/Downloads/gdp_data.xlsx")
gdp <- gdp_data %>%
mutate(Date = year(Date)) %>%
as_tsibble(index = Date)
gdp %>% autoplot() + labs(title = "US GDP")
## Plot variable not specified, automatically selected `.vars = Value`
#Based off the graph I think arima model will work
gdp %>% model(ARIMA(Value)) %>% report(gdp)
## Series: Value
## Model: ARIMA(1,1,0) w/ drift
##
## Coefficients:
## ar1 constant
## 0.7866 145.2454
## s.e. 0.1550 22.0977
##
## sigma^2 estimated as 13855: log likelihood=-135.55
## AIC=277.09 AICc=278.43 BIC=280.37
arima_gdp <- gdp %>%
model(arima110 = ARIMA(Value ~ pdq(1,1,0)))
arima_gdp %>%
gg_tsresiduals()
#the residuals resemble white noise
arima_gdp %>%
forecast(h = 4) %>%
autoplot(gdp) +
labs(title = "Four Year Forecast ")
gdp %>% model(ETS(Value)) %>% report(gdp)
## Series: Value
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.9998999
##
## Initial states:
## l[0] b[0]
## 9224.503 599.5141
##
## sigma^2: 1e-04
##
## AIC AICc BIC
## 297.9423 301.4717 303.6198
gdp_haha <- gdp %>%
model(
haha = ETS(Value ~ error("M") + trend("A") + season("N")))
gdp_aan <- gdp %>%
model(
AAN = ETS(Value ~ error("A") + trend("A") + season("N")))
gdp_haha %>%
gg_tsresiduals()
gdp_aan %>%
gg_tsresiduals()
#both reisuals resmeble white noise
gdp_aan %>%
forecast(h=4) %>%
autoplot(gdp) +
labs(title = "Four Year Forecast of US GDP")
gdp_haha %>%
forecast(h=4) %>%
autoplot(gdp) +
labs(title = "Four Year Forecast of US GDP")
gdp %>%
stretch_tsibble(.init = 10) %>%
model(
haha = ETS(Value ~ error("M") + trend("A") + season("N")),
AAN = ETS(Value ~ error("A") + trend("A") + season("N")),
arima110 = ARIMA(Value ~ pdq(1,1,0))) %>%
forecast(h = 1) %>%
accuracy(gdp)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2023
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAN Test 7.53 178. 138. -0.0455 0.760 0.219 0.274 0.333
## 2 arima110 Test 69.0 154. 128. 0.318 0.678 0.204 0.238 -0.116
## 3 haha Test 15.6 180. 144. -0.0173 0.774 0.229 0.278 0.457
#Based off accuracy, I choose the arima model