###################Chapter 8 Exponential Smoothing
#1.Consider the the number of pigs slaughtered in Victoria, available in the
#aus_livestock dataset.
View(aus_livestock)
pigs <- aus_livestock %>%
filter(Animal == 'Pigs' & State == 'Victoria')
View(pigs)
#a.Use the ETS() function to estimate the equivalent model for simple
#exponential smoothing. Find the optimal values of α and ℓ0,
#and generate forecasts for the next four months.
ets <- pigs %>%
model(ses = ETS(Count ~ error('A') + trend('N') + season('N')))
optimal_values <- ets %>% report()
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
#alpha=0.3221247, ℓ0=100646.6
pig_ses<-ets %>% forecast(h = 4)
Forcast <- ets %>%
forecast(h = 4) %>%
autoplot(filter(pigs, Month >= yearmonth('2017 Jan'))) +
labs(title = 'Four Month Forecast Data')
Forcast

#b.Compute a 95% prediction interval for the first forecast using ^y±1.96s where s
#is the standard deviation of the residuals. Compare your interval with the interval produced by R.
yhat<-pig_ses%>%
pull(Count)%>%
head(1)
standardDeviation <- augment(ets) %>%
pull(.resid) %>%
sd()
lower <- yhat - 1.96 * standardDeviation
upper <- yhat + 1.96 * standardDeviation
lower
## <distribution[1]>
## [1] N(76871, 8.7e+07)
upper
## <distribution[1]>
## [1] N(113502, 8.7e+07)
#The 95% prediction interval for the first forecast is from 76871 to 113502.
#2.Write your own function to implement simple exponential smoothing.
#The function should take arguments y (the time series), alpha (the smoothing parameter)
#and level (the initial levelℓ0).
#It should return the forecast of the next observation in the series.
#Does it give the same forecast as ETS()?
SES <- function(y, alpha, l0){
yhat <- l0
for(index in 1:length(y)){
yhat <- alpha*y[index] + (1 - alpha)*yhat
}
cat("Forecast of next observation by SES function: ",
as.character(yhat),
sep = "\n")
}
#3.Modify your function from the previous exercise to return the sum of squared
#errors rather than the forecast of the next observation. Then use the optim()
#function to find the optimal values ofα and ℓ0.
#Do you get the same values as the ETS() function?
vic_pig <- aus_livestock %>%
filter(Animal == "Pigs", State == "Victoria") %>%
pull(Count)
ses2 <- function(par, y)
{
alpha <- par[1]
level <- par[2]
n <- length(y)
yhat <- numeric(n)
yhat[1] <- level
for(i in 2:n)
{
yhat[i] <- alpha * y[i-1] + (1 - alpha) * yhat[i-1]
}
return(sum((y - yhat)^2) %>%
round(3)
)
}
optParam <- optim(c(0.00001, vic_pig[1]), ses2, y=vic_pig)$par
optParam
## [1] 3.236967e-01 9.422438e+04
#α=3.236967e-01 and ℓ0=9.422438e+04
#4.Combine your previous two functions to produce a function that both
#finds the optimal values of α and ℓ0, and produces a forecast of the next observation in the series.
SES <- function(init_pars, data){
fc_next <- 0
SSE <- function(pars, data){
error <- 0
SSE <- 0
alpha <- pars[1]
l0 <- pars[2]
y_hat <- l0
for(index in 1:length(data)){
error <- data[index] - y_hat
SSE <- SSE + error^2
y_hat <- alpha*data[index] + (1 - alpha)*y_hat
}
fc_next <<- y_hat
return(SSE)
}
optim_pars <- optim(par = init_pars, data = data, fn = SSE)
return(list(
Next_observation_forecast = fc_next,
alpha = optim_pars$par[1],
l0 = optim_pars$par[2]
))
}
SES(c(0.5, vic_pig[1]), vic_pig)
## $Next_observation_forecast
## [1] 95186.55
##
## $alpha
## [1] 0.3221274
##
## $l0
## [1] 99223.08
#α=0.3221274 and ℓ0=99223.08
#5.Data set global_economy contains the annual Exports from many countries.
#Select one country to analyse.
View(global_economy)
vietnam<-global_economy%>%
filter(Country=='Vietnam')
View(vietnam)
vietnam_train<- vietnam %>%
filter(Year<="2012")
View(vietnam_train)
#a.Plot the Exports series and discuss the main features of the data.
vietnam %>%
autoplot(Exports)+
labs(y="Exports",title="Vietnam Exports")
## Warning: Removed 26 row(s) containing missing values (geom_path).

#b.Use an ETS(A,N.N) model to forecast the series, and plot the forecasts
est_ann<- vietnam_train$Exports %>%
ets(model="ANN") %>%
forecast(h=5)
## Warning in ets(., model = "ANN"): Missing values encountered. Using longest
## contiguous portion of time series
est_ann
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 54 80.02855 71.81060 88.24651 67.46028 92.59683
## 55 80.02855 68.40719 91.64991 62.25522 97.80189
## 56 80.02855 65.79559 94.26152 58.26112 101.79599
## 57 80.02855 63.59388 96.46323 54.89389 105.16322
## 58 80.02855 61.65412 98.40299 51.92729 108.12982
est_ann %>% autoplot()

#c.Compute the RMSE values for the training data
sqrt(mean(est_ann$residuals^2))
## [1] 6.170434
#RMSE=6.170434
#d.Compare the results to those from an ETS(A,A,N) model. Discuss the merits of
#the two forecasting methods for this data set
est_aan<- vietnam_train$Exports %>%
ets(model="AAN") %>%
forecast(h=5)
## Warning in ets(., model = "AAN"): Missing values encountered. Using longest
## contiguous portion of time series
est_aan
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 54 79.56347 72.73426 86.39269 69.11909 90.00786
## 55 81.34833 74.51911 88.17755 70.90394 91.79272
## 56 83.08652 76.25730 89.91574 72.64213 93.53091
## 57 84.77926 77.95004 91.60848 74.33487 95.22365
## 58 86.42774 79.59852 93.25696 75.98335 96.87213
est_aan %>% autoplot()

#RMSE values
sqrt(mean(est_ann$residuals^2))
## [1] 6.170434
#RMSE=6.170434
#e.Compare the forecasts from both methods. Which do you think is best?
#ANN and AAN have the same RMSE value of 6.170434, but the forecast plot of AAN
#is closer to the original one. Hence, I think AAn is better.
#f.Calculate a 95% prediction interval for the first forecast for each model, using
#the RMSE values and assuming normal errors. Compare your intervals with those
#produced using R
#ANN
s1 <- sd(est_ann$residuals)
est_ann$mean[1] - 1.96 * s1 #lower 95%=68.96548
## [1] 68.96548
est_ann$mean[1] + 1.96 * s1 #upper 95%=91.09163
## [1] 91.09163
#AAN
s2 <- sd(est_aan$residuals)
est_aan$mean[1] - 1.96 * s2 #lower 95%=69.9559
## [1] 69.9559
est_aan$mean[1] + 1.96 * s2 #lower 95%=89.17105
## [1] 89.17105
#6.Forecast the Chinese GDP from the global_economy data set using an ETS model.
#Experiment with the various options in the ETS() function to see how much the
#forecasts change with damped trend, or with a Box-Cox transformation.
#Try to develop an intuition of what each is doing to the forecasts.
china<-global_economy %>%
filter(Country == 'China')
china %>%
autoplot(GDP) +
labs(y = "GDP", title = "Chinese GDP")

#generate forecasts for the next 5 years
china_holt<- china$GDP %>%
holt() %>%
forecast(h = 5)
china_holt %>% autoplot()

#7.Find an ETS model for the Gas data from aus_production and forecast the next few years.
#Why is multiplicative seasonality necessary here? Experiment with making the trend damped.
#Does it improve the forecasts?
aus_production %>%
autoplot(Gas)

ts <- ts(data=aus_production$Gas, frequency=4)
gas <- ets(ts, model="MAM")
#forecast the next 5 years
fc_gas <- gas %>% forecast(h=20)
fc_gas %>% autoplot()

gas
## ETS(M,A,M)
##
## Call:
## ets(y = ts, model = "MAM")
##
## Smoothing parameters:
## alpha = 0.6529
## beta = 0.1442
## gamma = 0.0978
##
## Initial states:
## l = 5.9456
## b = 0.0706
## s = 0.9309 1.1779 1.0749 0.8163
##
## sigma: 0.057
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
#alpha=0.6529 beta=0.1442
#AIC:1680.929 AICc:1681.794 BIC:1711.389
#Multiplicative seasonality is necessary because the variation changes with proportion to level,
#trend damped
gas_damped <- ets(ts, model="MAM", damped=TRUE)
fc_gas_damped <- gas_damped %>% forecast(h=20)
fc_gas_damped %>% autoplot()

gas_damped
## ETS(M,Ad,M)
##
## Call:
## ets(y = ts, model = "MAM", damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.6489
## beta = 0.1551
## gamma = 0.0937
## phi = 0.98
##
## Initial states:
## l = 5.8589
## b = 0.0994
## s = 0.9282 1.1779 1.0768 0.8171
##
## sigma: 0.0573
##
## AIC AICc BIC
## 1684.028 1685.091 1717.873
#alpha=0.6489 beta=0.1551
#AIC:1684.028 AICc:1685.091 BIC:1717.873
#By comparing AICc and BIC, we can not conclude that the trend damped improves the model
#8.Recall your retail time series data
#a.Why is multiplicative seasonality necessary for this series?
set.seed(333)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
ts <- ts(data=myseries$Turnover, frequency=12)
ts%>% autoplot()

retail_hw <- ets(ts, model="MAM")
#forecast the next 5 years
fc_retail_hw <- retail_hw %>% forecast(h=60)
fc_retail_hw %>% autoplot()

#b.Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
retail_hw_damped <- ets(ts, model="MAM", damped=TRUE)
fc_retail_hw_damped <- retail_hw_damped %>% forecast(h=60)
fc_retail_hw_damped %>% autoplot()

#c.Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
sqrt(mean(fc_retail_hw$residuals^2))
## [1] 0.09928151
#RMSE=0.09928151
sqrt(mean(fc_retail_hw_damped$residuals^2))
## [1] 0.1008364
#RMSE=0.1008364
#The non damped one is more preferred since it has a lower RMSE.
#d.Check that the residuals from the best method look like white noise.
checkresiduals(retail_hw)

##
## Ljung-Box test
##
## data: Residuals from ETS(M,A,M)
## Q* = 81.106, df = 8, p-value = 2.931e-14
##
## Model df: 16. Total lags used: 24
#The residual plot does look like white noise
#e.Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
train <- myseries %>%
filter(year(Month) <= 2010)
view(train)
test <- myseries %>%
filter(year(Month) > 2010)
ts_train <- ts(data=train$Turnover, frequency=12)
ts_test <- ts(data=test$Turnover, frequency=12)
retail_hw_train <- ets(ts_train, model="MAM")
sqrt(mean(retail_hw_train$residuals^2))
## [1] 0.09307003
#RMSE=0.09307003
retail_hw_test <- ets(ts_test, model="MAM")
sqrt(mean(retail_hw_test$residuals^2))
## [1] 0.08166493
#RMSE=0.08166493
fc <- retail_hw_train %>%
forecast(new_data = anti_join(ts, ts_train))
retail_hw_train %>% accuracy()
## ME RMSE MAE MPE MAPE MASE
## Training set -0.03733977 0.6889633 0.5020404 -0.9923539 7.02562 0.4540452
## ACF1
## Training set 0.035107
fc %>% accuracy(ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.03733977 0.6889633 0.5020404 -0.9923539 7.02562 0.4540452
## Test set -3.82432232 3.9775350 3.8243223 -74.3310267 74.33103 3.4587163
## ACF1 Theil's U
## Training set 0.0351070 NA
## Test set 0.1770195 2.424308
#Based on the result, the accuracy of the training set is much better than the
#test set. The HW outperform the Seasonal Naive model from Exercise 7 in Section 5.11
#9.For the same retail data, try an STL decomposition applied to the Box-Cox transformed series,
#followed by ETS on the seasonally adjusted data.
#How does that compare with your best previous forecasts on the test set?
ts_stl <- stl(ts_train, s.window='periodic')
ts_stl %>% autoplot()

ts_bc_ets<- ts_train %>%
ets(lambda = BoxCox.lambda(ts_train)) %>%
forecast(h=10)
ts_bc_ets %>% autoplot()

fc <- ts_bc_ets %>%
forecast(new_data = anti_join(timeseries, timeseries_train))
ts_bc_ets %>% accuracy()
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03998483 0.6803232 0.4939701 0.1571171 6.864127 0.4467464
## ACF1
## Training set 0.03852586
#By comparing the RMSE, the STL decomposition performs better than the previous one
#10.Compute the total domestic overnight trips across Australia from the tourism dataset.
View(tourism)
#a.Plot the data and describe the main features of the series.
aus_trip<-tourism %>%
summarise(Trips = sum(Trips))
aus_trip %>% autoplot(Trips) +
labs(title = "Australian Overnight Trips")

#The plot shows a decreasing trend from 2000 to 2010, then start to raise after 2010
#b.Decompose the series using STL and obtain the seasonally adjusted data.
aus_trip_stl <- aus_trip %>%
model(STL(Trips)) %>%
components()
aus_trip_stl %>% autoplot()

aus_trip_stl %>% as_tsibble() %>%
autoplot(season_adjust) +
labs(title = "Australian Overnight Trips (STL)")

#c.Forecast the next two years of the series using an additive damped trend method applied to the seasonally adjusted data. (This can be specified using decomposition_model().)
aus_trip %>%
model(
decomposition_model(STL(Trips),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N")))) %>%
forecast(h = "2 years") %>% autoplot(aus_trip) +
labs(title = "Australian Overnight Trips with Additive Damped Trend Forecast")

#d.Forecast the next two years of the series using an appropriate model for Holt’s linear method applied to the seasonally adjusted data (as before but without damped trend).
aus_trip %>%
model(
decomposition_model(STL(Trips),
ETS(season_adjust ~ error("A") + trend("A") + season("N")))) %>%
forecast(h = "2 years") %>%
autoplot(aus_trip) +
labs(title = "Australian Overnight Trips with Holt's Linear Method Forecast")

#e.Now use ETS() to choose a seasonal model for the data.
aus_trip %>%
model(ETS(Trips)) %>%
forecast(h = "2 years") %>%
autoplot(aus_trip) +
labs(title = "Australian Overnight Trips (ETS)")

#f.Compare the RMSE of the ETS model with the RMSE of the models you obtained using STL decompositions. Which gives the better in-sample fits?
compare_stl <- aus_trip %>%
model(
stl_AAdn = decomposition_model(STL(Trips),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))),
stl_AAN = decomposition_model(STL(Trips),
ETS(season_adjust ~ error("A") +
trend("A") +
season("N"))),
ETS = ETS(Trips)
)
accuracy(compare_stl)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stl_AAdn Training 103. 763. 576. 0.367 2.72 0.607 0.629 -0.0174
## 2 stl_AAN Training 99.7 763. 574. 0.359 2.71 0.604 0.628 -0.0182
## 3 ETS Training 105. 794. 604. 0.379 2.86 0.636 0.653 -0.00151
#By comparing the ME and RMSE, the stl_aan perform better
#g.Compare the forecasts from the three approaches? Which seems most reasonable?
compare_stl %>%
forecast(h = "2 years") %>%
autoplot(aus_trip, level = NULL) +
labs(title = "Australian Overnight Trips: Comparing Forecasts") +
guides(colour=guide_legend(title="Types of Forecast"))

#Based on the plot, stl_aan tends to be the average of ets and stl_aadn.
#So, stl_aan should be more reasonable
#h.Check the residuals of your preferred model.
resid_aan <- compare_stl %>%
select(stl_AAN)
resid_aan %>%gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).

#11.For this exercise use the quarterly number of arrivals to Australia from New Zealand, 1981 Q1 – 2012 Q3, from data set aus_arrivals.
View(aus_arrivals)
nz<-aus_arrivals%>%
filter(Origin=='NZ')
View(nz)
#a.Make a time plot of your data and describe the main features of the series.
autoplot(nz,Arrivals)+
labs(title = "New Zealand Arrivals in Australia")

#b.Create a training set that withholds the last two years of available data. Forecast the test set using an appropriate model for Holt-Winters’ multiplicative method.
train<-nz %>%
filter(Quarter<yearquarter('2011 Q1'))
View(train)
train %>%
model(
ETS(Arrivals ~ error("M") +
trend("A") +
season("M"))) %>%
forecast(h = 8) %>%
autoplot(level=NULL,colour = "red") +
autolayer(nz, Arrivals) +
labs(title = "Train vs. Actual New Zealand Arrivals in Australia")

#c.Why is multiplicative seasonality necessary here?
#Because the seasonal variation is increasing.
#d.Forecast the two-year test set using each of the following methods:
##an ETS model;
##an additive ETS model applied to a log transformed series;
##a seasonal naïve method;
##an STL decomposition applied to the log transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
compare_models <- anti_join(nz,train,
by = c("Quarter",
"Origin",
"Arrivals"))
arrivals_models <- nz %>%
model(
ETS_mod = ETS(Arrivals),
Log_mod = ETS(log(Arrivals) ~ error("A") +
trend("A") +
season("A")),
SNAIVE_mod = SNAIVE(Arrivals),
STL_decomp_log = decomposition_model(STL(log(Arrivals)),
ETS(season_adjust)))
arrivals_f <- arrivals_models %>% forecast(h = 8)
arrivals_f %>% autoplot(level = NULL) +
autolayer(compare_models, Arrivals) +
labs(title = "Train vs. Actual New Zealand Arrivals in Australia") +
guides(colour = guide_legend(title = "Forecast"))

#e.Which method gives the best forecasts? Does it pass the residual tests?
#ETS_mod gives the best forecasts.
aus_optimal <- arrivals_models %>% select('ETS_mod')
aus_optimal %>% gg_tsresiduals()

#f.Compare the same four methods using time series cross-validation instead of using a training and test set. Do you come to the same conclusions?
arrivals_models_crossval <- nz %>%
slice(1:(n()-3)) %>%
stretch_tsibble(.init = 36, .step = 3)
arrivals_models_crossval %>%
model(
ETS_mod = ETS(Arrivals),
Add_Log = ETS(log(Arrivals) ~ error("A") +
trend("A") +
season("A")),
SNAIVE_mod = SNAIVE(Arrivals),
STL_decomp_log = decomposition_model(STL(log(Arrivals)),
ETS(season_adjust))
) %>%
forecast(h = 3) %>%
accuracy(nz)
## # A tibble: 4 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Add_Log NZ Test -375. 14347. 11021. 0.200 5.83 0.741 0.746 0.309
## 2 ETS_mod NZ Test 4627. 15327. 11799. 2.23 6.45 0.793 0.797 0.283
## 3 SNAIVE_mod NZ Test 8244. 18768. 14422. 3.83 7.76 0.970 0.976 0.566
## 4 STL_decomp_log NZ Test 4252. 15618. 11873. 2.04 6.25 0.798 0.812 0.244
#The Add_Log is the best model with the lowest RMSE value.
#13.Compare ETS(), SNAIVE() and decomposition_model(STL, ???) on the following five time series.
#You might need to use a Box-Cox transformation for the STL decomposition forecasts.
#Use a test set of three years to decide what gives the best forecasts.
##Beer and bricks production from aus_production.
aus_production %>% autoplot(Beer) +
labs(title = "Australian Beer Production")

beer_f <- aus_production %>%
slice(1:(nrow(aus_production)-12)) %>%
model(
ETS_beer = ETS(Beer),
SNAIVE_beer = SNAIVE(Beer),
STL_Decomp_beer = decomposition_model(STL(log(Beer)),
ETS(season_adjust))
) %>% forecast(h = "3 years")
beer_f %>%
autoplot(filter_index(aus_production, "2000 Q1" ~ .), level = NULL) +
labs(title = "Australian Beer Production")

beer_f %>% accuracy(aus_production)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS_beer Test 0.127 9.62 8.92 0.00998 2.13 0.563 0.489 0.376
## 2 SNAIVE_beer Test -2.92 10.8 9.75 -0.651 2.34 0.616 0.549 0.325
## 3 STL_Decomp_beer Test -2.85 9.87 8.95 -0.719 2.16 0.565 0.502 0.283
aus_bricks <- aus_production %>% filter(!is.na(Bricks)) #### FIXXXXXXX
aus_bricks %>% autoplot(Bricks) +
labs(title = "Australian Quarterly Brick Production")

bricks_f <- aus_bricks %>% slice(1:(nrow(aus_bricks)-12)) %>%
model(
ETS = ETS(Bricks),
SNAIVE = SNAIVE(Bricks),
STL_Decomp = decomposition_model(STL(log(Bricks)),
ETS(season_adjust))) %>%
forecast(h = "3 years")
bricks_f %>%
autoplot(filter_index(aus_bricks, "1980 Q1" ~ .), level = NULL) +
labs(title = "Australian Quarterly Brick Production")

bricks_f %>% accuracy(aus_bricks)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS Test 2.27 17.5 13.2 0.474 3.31 0.365 0.354 0.339
## 2 SNAIVE Test 32.6 36.5 32.6 7.85 7.85 0.898 0.739 -0.322
## 3 STL_Decomp Test 9.71 18.7 14.9 2.29 3.65 0.411 0.378 0.0933
##Cost of drug subsidies for diabetes (ATC2 == "A10") and corticosteroids (ATC2 == "H02") from PBS.
drug_subsidies <- PBS %>%
filter(ATC2 %in% c("A10", "H02")) %>%
group_by(ATC2) %>%
summarise(Cost = sum(Cost))
drug_subsidies %>%
autoplot(Cost) + facet_wrap(vars(ATC2, scales = "free_y")) +
labs(title = "Diabetes & Corticosteroids Drug Subsidy Costs")

cortico <- drug_subsidies %>%
filter(ATC2 %in% 'H02')
cortico_cost <- cortico %>%
features(Cost, features = guerrero) %>%
pull(lambda_guerrero)
cortico_f <-cortico %>% filter(Month < max(Month) - 35) %>%
model(
ETS = ETS(Cost),
SNAIVE = SNAIVE(Cost),
STL_Decomp = decomposition_model(STL(box_cox(log(Cost), cortico_cost)), ETS(season_adjust))
) %>%
forecast(h = "3 years")
##Total food retailing turnover for Australia from aus_retail.
View(aus_retail)
aus_food <- aus_retail %>%
filter(Industry=="Food retailing") %>%
summarise(Turnover=sum(Turnover))
aus_food %>%
autoplot(Turnover) +
labs(title = "Australian Food Turnover")

food_turnover <- aus_food %>% features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
aus_food_f <- aus_food %>%
filter(Month < max(Month) - 35) %>%
model(ETS = ETS(Turnover),
SNAIVE = SNAIVE(Turnover),
STL_Decomp = decomposition_model(STL(box_cox(log(Turnover),
food_turnover)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
aus_food_f %>% autoplot(filter_index(aus_food, "2005 Jan" ~ .), level = NULL) +
labs(title = "Australian Food Turnover Forecast Comparisons")

aus_food_f %>% accuracy(aus_food)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS Test -151. 194. 170. -1.47 1.65 0.639 0.634 0.109
## 2 SNAIVE Test 625. 699. 625. 5.86 5.86 2.35 2.29 0.736
## 3 STL_Decomp Test -8.34 155. 132. -0.135 1.24 0.496 0.506 0.256
#14.a.Use ETS() to select an appropriate model for the following series: total number of trips across Australia using tourism, the closing prices for the four stocks in gafa_stock, and the lynx series in pelt. Does it always give good forecasts?
aus_tourism <- tourism %>%
summarise(Trips = sum(Trips))
tourism_trips <- aus_tourism %>%
model(ETS(Trips))
tourism_trips %>%
forecast() %>%
autoplot(aus_tourism) +
labs(title = "Australian Tourism Forecast")

gafa_stock %>%
autoplot(Close) +
facet_grid(vars(Symbol), scales = "free_y") +
labs(title = "Closing Stock Prices")

gafa_avg <- gafa_stock %>%
group_by(Symbol) %>%
mutate(trading_day = row_number()) %>%
ungroup() %>%
as_tsibble(index = trading_day, regular = TRUE)
gafa_fc <- fit <- gafa_avg %>%
model(
ETS(Close)
)
gafa_fc %>%
forecast(h=50) %>%
autoplot(gafa_avg %>% group_by_key() %>% slice((n() - 100):n())) +
labs(title = "Forecasted Closing Stock Prices")
## `mutate_if()` ignored the following grouping variables:
## • Column `Symbol`

#b.Find an example where it does not work well. Can you figure out why?
pelt %>%
model(ETS(Lynx))
## # A mable: 1 x 1
## `ETS(Lynx)`
## <model>
## 1 <ETS(A,N,N)>
pelt %>%
model(ETS(Lynx)) %>% forecast(h = 10) %>%
autoplot(pelt) +
labs(title = "Forecasted Pelt Trades")

#The ETS does not always give good forecasts as seen by the pelt forecast,
#where the forecast does not seem to give any accurate indication of future pelt trades
#15.Show that the point forecasts from an ETS(M,A,M) model are the same as those obtained using Holt-Winters’ multiplicative method.
usdeaths.ets2 <- ets(usdeaths,model = "MAM")
usdeaths.ets2fc <- forecast(usdeaths.ets2, h=1)
usdeaths.ets2fc$mean
## Jan
## 1979 8233.107
usdeaths.hwM <- hw(usdeaths, seasonal = 'multiplicative', h=1)
usdeaths.hwM$mean
## Jan
## 1979 8217.64
#16.Show that the forecast variance for an ETS(A,N,N) model is given by σ^2[1+α^2(h−1)]
ibm_ets <- ets(ibmclose, model = "ANN")
ibm_ets_fc <- forecast(ibm_ets, 1)
ibm_ets_fc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 370 356.9995 347.6907 366.3083 342.7629 371.2361
ibm_sig <- 7.2637
alpha <- .9999^2
h <- 2
conf_int <- ibm_sig*(1+alpha*(h-1))
ibm_ets_fc$mean[1]
## [1] 356.9995
#356.9995
ibm_ets_fc$mean[1] - conf_int
## [1] 342.4736
#342.4736
ibm_ets_fc$lower[1, '95%']
## 95%
## 342.7629
#342.7629
#17.Write down 95% prediction intervals for an ETS(A,N,N) model
#as a function of ℓT,α,h and σ, assuming normally distributed errors.
#yhatT+h|T ± 1.96^estimate_std_dev
###################Chapter 9 Exponential Smoothing
#1a.All figures show white noise but with different critical values.
#b.The critical values are at different distances from zero because the data sets
#have different number of observations. The more the observations, the less the noise
#2.A classic example of a non-stationary series are stock prices. Plot the daily
#closing prices for Amazon stock (contained in gafa_stock),along with the ACF and PACF.
#Explain how each plot shows that the series is non-stationary and should be differenced.
gafa_stock %>%
filter(Symbol=="AMZN")%>%
autoplot(Close)+labs(y="$", title="Amazon CLOSE")

gafa_stock %>%
filter(Symbol=="AMZN")%>%
ACF(Close)%>%autoplot()
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

#The ACF plot indicates autocorrelation with a downward decreasing lag without a seasonality pattern.
#All lags exhibiting correlations significantly different from zero.
gafa_stock %>%
filter(Symbol=="AMZN")%>%
PACF(Close)%>%autoplot()
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

#The PCAF also lags exhibiting partial correlations significantly different from zero.
#3.For the following series, find an appropriate Box-Cox transformation and order
#of differencing in order to obtain stationary data.
#a.Turkish GDP from global_economy.
Turkey<-global_economy %>%
filter(Country=="Turkey")
Turkey%>%
autoplot(GDP)+labs(y="$", title="TURKEY GDP")

lambda<-Turkey%>%
features(GDP,features=guerrero)%>%
pull(lambda_guerrero)
lambda
## [1] 0.1572187
Turkey%>% mutate(newG=GDP^lambda)
## # A tsibble: 58 x 10 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population newG
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Turkey TUR 1960 1.40e10 NA 5.40e-5 3.67 2.06 27472331 39.4
## 2 Turkey TUR 1961 8.02e 9 1.16 5.57e-5 6.79 5.12 28146893 36.1
## 3 Turkey TUR 1962 8.92e 9 5.57 5.78e-5 7.97 5.60 28832805 36.7
## 4 Turkey TUR 1963 1.04e10 9.07 6.15e-5 6.97 4.18 29531342 37.5
## 5 Turkey TUR 1964 1.12e10 5.46 6.22e-5 5.47 4.47 30244232 38.0
## 6 Turkey TUR 1965 1.19e10 2.82 6.50e-5 5.40 4.56 30972965 38.4
## 7 Turkey TUR 1966 1.41e10 11.2 7.05e-5 5.66 4.09 31717477 39.4
## 8 Turkey TUR 1967 1.57e10 4.73 8.04e-5 4.96 4.11 32477961 40.1
## 9 Turkey TUR 1968 1.75e10 6.78 8.53e-5 5.08 3.68 33256432 40.8
## 10 Turkey TUR 1969 1.95e10 4.08 8.95e-5 4.74 3.60 34055361 41.5
## # … with 48 more rows
Turkey%>%
autoplot(box_cox(GDP, lambda))+
labs(y="",
title=latex2exp::TeX(paste0(
"Transformed GDP with $$\\lambda$ =",
round(lambda,2))))

Turkey<-Turkey%>% mutate(newG=GDP^lambda)
Turkey%>%autoplot(newG)

Turkey%>% features(newG,ljung_box, lag=10)
## # A tibble: 1 × 3
## Country lb_stat lb_pvalue
## <fct> <dbl> <dbl>
## 1 Turkey 341. 0
Turkey %>%
mutate(diff_close = difference(newG)) %>%
features(diff_close, ljung_box, lag = 10)
## # A tibble: 1 × 3
## Country lb_stat lb_pvalue
## <fct> <dbl> <dbl>
## 1 Turkey 5.84 0.829
Turkey%>%
ACF(difference(newG))%>%autoplot()

Turkey%>%
PACF(difference(newG))%>%
autoplot()

#ACF and PACF are all within the interval
#b.Accommodation takings in the state of Tasmania from aus_accommodation.
Tasmania<-aus_accommodation %>%
filter(State=="Tasmania")
Tasmania%>%
autoplot(Takings)+labs(y="$", title="Tasmania GDP")

lambda1<-Tasmania%>%
features(Takings,features=guerrero)%>%
pull(lambda_guerrero)
Tasmania<-Tasmania%>% mutate(newT=Takings^lambda1)
Tasmania%>%
autoplot(box_cox(Takings, lambda1))+
labs(y="",
title=latex2exp::TeX(paste0(
"Transformed Takings with $$\\lambda1$ =",
round(lambda1,2))))

Tasmania%>%autoplot(newT)

Tasmania%>% features(newT,unitroot_kpss)
## # A tibble: 1 × 3
## State kpss_stat kpss_pvalue
## <chr> <dbl> <dbl>
## 1 Tasmania 1.83 0.01
#1st diff
Tasmania %>%
mutate(diff_Taking = difference(newT))%>%
features(diff_Taking, unitroot_kpss)
## # A tibble: 1 × 3
## State kpss_stat kpss_pvalue
## <chr> <dbl> <dbl>
## 1 Tasmania 0.256 0.1
Tasmania<-Tasmania %>%
mutate(diff_Taking = difference(newT))
Tasmania %>%
features(diff_Taking, unitroot_ndiffs)
## # A tibble: 1 × 2
## State ndiffs
## <chr> <int>
## 1 Tasmania 0
Tasmania%>%
ACF((diff_Taking))%>%autoplot()

Tasmania%>%
PACF((diff_Taking))%>%
autoplot()

Tasmania %>%
features(diff_Taking, unitroot_nsdiffs)
## # A tibble: 1 × 2
## State nsdiffs
## <chr> <int>
## 1 Tasmania 1
#suggests 1 seasonal diff
Tasmania<-Tasmania %>%
mutate(diff_Taking2 = difference(diff_Taking,12))
Tasmania%>%
ACF((diff_Taking2))%>%
autoplot()

Tasmania%>%
PACF((diff_Taking2))%>%
autoplot()

Tasmania%>% autoplot(diff_Taking2)
## Warning: Removed 13 row(s) containing missing values (geom_path).

#c.Monthly sales from souvenirs.
souvenirs %>%
autoplot(Sales)+labs(y="$", title="Souvenir sales")

lambda2<-souvenirs%>%
features(Sales,features=guerrero)%>%
pull(lambda_guerrero)
SouvSales<-souvenirs%>% mutate(newS=Sales^lambda2)
SouvSales%>%
autoplot(box_cox(Sales, lambda2))+
labs(y="",
title=latex2exp::TeX(paste0(
"Transformed Sales with $$\\lambda2$ =",
round(lambda2,2))))

SouvSales%>%autoplot(newS)

SouvSales%>% features(newS,unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 1.79 0.01
SouvSales %>%
mutate(diff_Sales = difference(newS))%>%
features(diff_Sales, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0631 0.1
#1st diff
SouvSales <-SouvSales %>%
mutate(diff_Sales = difference(newS))
SouvSales %>%
features(diff_Sales, unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 0
SouvSales%>%
ACF((diff_Sales))%>%autoplot()

SouvSales%>%
PACF((diff_Sales))%>%
autoplot()

SouvSales %>%
features(diff_Sales, unitroot_nsdiffs)
## # A tibble: 1 × 1
## nsdiffs
## <int>
## 1 1
#1 seasonal diff
SouvSales<-SouvSales %>%
mutate(diff_Sales2 = difference(diff_Sales,12))
SouvSales%>%
ACF((diff_Sales2))%>%
autoplot()

SouvSales%>%
PACF((diff_Sales2))%>%
autoplot()

SouvSales%>% autoplot(diff_Sales2)
## Warning: Removed 13 row(s) containing missing values (geom_path).

#4.For the souvenirs data, write down the differences you chose above using backshift operator notation.
#These series was transformed with a boxcox transformation first, and then calculate 1st differenced and a seasonality difference. It shows improvement towards stationary,
#ACF and PACF plots indicate the model still needs more work since the critical values exceed the boundaries
#5.For your retail data (from Exercise 8 in Section 2.10), find the appropriate
#order of differencing (after transformation if necessary) to obtain stationary data.
aus_retail %>% distinct(Industry)
## # A tibble: 20 × 1
## Industry
## <chr>
## 1 Cafes, restaurants and catering services
## 2 Cafes, restaurants and takeaway food services
## 3 Clothing retailing
## 4 Clothing, footwear and personal accessory retailing
## 5 Department stores
## 6 Electrical and electronic goods retailing
## 7 Food retailing
## 8 Footwear and other personal accessory retailing
## 9 Furniture, floor coverings, houseware and textile goods retailing
## 10 Hardware, building and garden supplies retailing
## 11 Household goods retailing
## 12 Liquor retailing
## 13 Newspaper and book retailing
## 14 Other recreational goods retailing
## 15 Other retailing
## 16 Other retailing n.e.c.
## 17 Other specialised food retailing
## 18 Pharmaceutical, cosmetic and toiletry goods retailing
## 19 Supermarket and grocery stores
## 20 Takeaway food services
set.seed(1111111)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>% distinct(Industry)
## # A tibble: 1 × 1
## Industry
## <chr>
## 1 Newspaper and book retailing
head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 Western Australia Newspaper and book retailing A3349822A 1982 Apr 9.7
## 2 Western Australia Newspaper and book retailing A3349822A 1982 May 11
## 3 Western Australia Newspaper and book retailing A3349822A 1982 Jun 10.7
## 4 Western Australia Newspaper and book retailing A3349822A 1982 Jul 9
## 5 Western Australia Newspaper and book retailing A3349822A 1982 Aug 9.1
## 6 Western Australia Newspaper and book retailing A3349822A 1982 Sep 10
str(myseries)
## tbl_ts [441 × 5] (S3: tbl_ts/tbl_df/tbl/data.frame)
## $ State : chr [1:441] "Western Australia" "Western Australia" "Western Australia" "Western Australia" ...
## $ Industry : chr [1:441] "Newspaper and book retailing" "Newspaper and book retailing" "Newspaper and book retailing" "Newspaper and book retailing" ...
## $ Series ID: chr [1:441] "A3349822A" "A3349822A" "A3349822A" "A3349822A" ...
## $ Month : mth [1:441] 1982 Apr, 1982 May, 1982 Jun, 1982 Jul, 1982 Aug, 1982 Sep...
## $ Turnover : num [1:441] 9.7 11 10.7 9 9.1 10 7.7 8.4 11.8 7.4 ...
## - attr(*, "key")= tibble [1 × 3] (S3: tbl_df/tbl/data.frame)
## ..$ State : chr "Western Australia"
## ..$ Industry: chr "Newspaper and book retailing"
## ..$ .rows : list<int> [1:1]
## .. ..$ : int [1:441] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
## - attr(*, "index")= chr "Month"
## ..- attr(*, "ordered")= logi TRUE
## - attr(*, "index2")= chr "Month"
## - attr(*, "interval")= interval [1:1] 1M
## ..@ .regular: logi TRUE
autoplot(myseries,.vars=Turnover)

gg_season(myseries,y=Turnover)

gg_subseries(myseries,y=Turnover)

gg_lag(myseries,y=Turnover)

lambda3<-myseries%>%
features(Turnover,features=guerrero)%>%
pull(lambda_guerrero)
myseries<-myseries%>% mutate(newT=Turnover^lambda3)
myseries%>%
autoplot(box_cox(Turnover, lambda3))+
labs(y="",
title=latex2exp::TeX(paste0(
"Transformed Turnover with $$\\lambda3$ =",
round(lambda3,2))))

myseries%>%autoplot(newT)

myseries%>% features(newT,unitroot_kpss)
## # A tibble: 1 × 4
## State Industry kpss_stat kpss_pvalue
## <chr> <chr> <dbl> <dbl>
## 1 Western Australia Newspaper and book retailing 6.05 0.01
myseries %>%
mutate(diff_Turn = difference(newT))%>%
features(diff_Turn, unitroot_kpss)
## # A tibble: 1 × 4
## State Industry kpss_stat kpss_pvalue
## <chr> <chr> <dbl> <dbl>
## 1 Western Australia Newspaper and book retailing 0.0187 0.1
#1st diff
myseries <-myseries %>%
mutate(diff_Turn = difference(newT))
myseries %>%
features(diff_Turn, unitroot_ndiffs)
## # A tibble: 1 × 3
## State Industry ndiffs
## <chr> <chr> <int>
## 1 Western Australia Newspaper and book retailing 0
#ACF and PACF
myseries%>%
ACF((diff_Turn))%>%autoplot()

myseries%>%
PACF((diff_Turn))%>%
autoplot()

myseries %>%
features(diff_Turn, unitroot_nsdiffs)
## # A tibble: 1 × 3
## State Industry nsdiffs
## <chr> <chr> <int>
## 1 Western Australia Newspaper and book retailing 1
#suggests take 1 seasonal diff
myseries<-myseries %>%
mutate(diff_Turn2 = difference(diff_Turn,12))
myseries%>%
ACF((diff_Turn2))%>%
autoplot()

myseries%>%
PACF((diff_Turn2))%>%
autoplot()

myseries%>% autoplot(diff_Turn2)
## Warning: Removed 13 row(s) containing missing values (geom_path).

#6.Simulate and plot some data from simple ARIMA models.
#a.
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
#b.
sim%>%
autoplot(y)

set.seed(6543221)
y2 <- ts(numeric(100))
y3 <- ts(numeric(100))
e2 <- rnorm(100)
e3 <- rnorm(100)
for(i in 2:100){
y2[i] <- 0.1*y2[i-1] + e2[i]
y3[i] <- 0.9*y3[i-1] + e3[i]
}
sim <- tsibble(idx = seq_len(100), y = y, y2 = y2, y3 = y3, index = idx)
plot1 <- sim %>% autoplot(y) + labs( title = "phi = 0.6")
plot2 <- sim %>% autoplot(y2) + labs( title = "phi = 0.1")
plot3 <- sim %>% autoplot(y3) + labs( title = "phi = 0.9")
plot1

plot2

plot3

#c.
set.seed(12345566)
y4 <- numeric(100)
e4 <- rnorm(100)
for(i in 2:100)
y4[i] <- 0.6*e4[i-1] + e4[i]
sim <- tsibble(idx = seq_len(100), y4 = y4, index = idx)
#d.
sim %>% autoplot(y4) + labs(title="MA(1) 0.6 ")

set.seed(55667788)
y5 <- ts(numeric(100))
e5 <- rnorm(100)
y6 <- ts(numeric(100))
e6 <- rnorm(100)
for(i in 2:100){
y5[i] <- 0.1*e5[i-1] + e5[i]
y6[i] <- 0.9*e6[i-1] + e6[i]
}
sim <- tsibble(idx = seq_len(100), y4 = y4, y5 = y5, y6 = y6, index = idx)
plot1a <- sim %>% autoplot(y4) + labs( title = "theta = 0.6")
plot2a <- sim %>% autoplot(y5) + labs( title = "theta = 0.1")
plot3a <- sim %>% autoplot(y6) + labs( title = "theta = 0.9")
plot1a

plot2a

plot3a

#e.
set.seed(222222)
a7 <- ts(numeric(100))
e7 <- rnorm(100)
for(i in 2:100){
a7[i] <- 0.6*a7[i] + 0.6*e7[i-1] + e7[i]
}
sim7 <- tsibble(idx = seq_len(100), a7 = a7, index = idx)
plot7 <- sim7 %>% autoplot(a7) + labs( title = "ARMA(1,1) theta=.6, phi=.6")
plot7

#f.
set.seed(333333)
y8 <- ts(numeric(100))
e8 <- rnorm(100)
for(i in 3:100){
y8[i] <- -0.8*y8[i-1] + 0.3*y8[i-2] + e8[i]
}
sim8 <- tsibble(idx = seq_len(100), y8 = y8, index = idx)
plot8 <- sim8 %>% autoplot(y8) + labs( title = "AR(2) phi=-.8,.3")
plot8

#g.
plot7

plot8

#7.Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.
View(aus_airpassengers)
autoplot(aus_airpassengers)
## Plot variable not specified, automatically selected `.vars = Passengers`

#a.Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods.
fit<-aus_airpassengers %>%
model(ARIMA(Passengers))
report(fit)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
#ARIMA(0,2,1)
fit2<-aus_airpassengers %>%
model(arima021 =ARIMA(Passengers ~ pdq(0,2,1)))
fit2 %>%
select(arima021) %>%
gg_tsresiduals()

augment(fit2) %>% features(.innov, ljung_box, lag = 10)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 arima021 6.70 0.754
fit2 %>% forecast(h=10) %>%
autoplot(aus_airpassengers)

#b.Write the model in terms of the backshift operator.
#((1−B)2)Yt=(1+(Θ1)B)ϵt
#c.Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.
fit3<-aus_airpassengers %>%
model(arima010 =ARIMA(Passengers ~ 1 + pdq(0,1,0)))
report(fit3)
## Series: Passengers
## Model: ARIMA(0,1,0) w/ drift
##
## Coefficients:
## constant
## 1.4191
## s.e. 0.3014
##
## sigma^2 estimated as 4.271: log likelihood=-98.16
## AIC=200.31 AICc=200.59 BIC=203.97
fit3 %>% forecast(h=10) %>%
autoplot(aus_airpassengers)

#d.Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.
fit4<-aus_airpassengers %>%
model(arima212 =ARIMA(Passengers ~ 1 + pdq(2,1,2)))
report(fit4)
## Series: Passengers
## Model: ARIMA(2,1,2) w/ drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## -0.5518 -0.7327 0.5895 1.0000 3.2216
## s.e. 0.1684 0.1224 0.0916 0.1008 0.7224
##
## sigma^2 estimated as 4.031: log likelihood=-96.23
## AIC=204.46 AICc=206.61 BIC=215.43
fit4 %>% forecast(h=10) %>%
autoplot(aus_airpassengers)

#Without constant
fit5<-aus_airpassengers %>%
model(arima212n =ARIMA(Passengers ~ 0+pdq(2,1,2)))
## Warning: 1 error encountered for arima212n
## [1] non-stationary AR part from CSS
report(fit5)
## Series: Passengers
## Model: NULL model
## NULL model
fit5 %>% forecast(h=10) %>%
autoplot(aus_airpassengers)
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 10 row(s) containing missing values (geom_path).

#e.Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?
fit6<-aus_airpassengers %>%
model(arima021 =ARIMA(Passengers ~ 1 +pdq(0,2,1)))
## Warning: Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
report(fit6)
## Series: Passengers
## Model: ARIMA(0,2,1) w/ poly
##
## Coefficients:
## ma1 constant
## -1.0000 0.0571
## s.e. 0.0585 0.0213
##
## sigma^2 estimated as 3.855: log likelihood=-95.1
## AIC=196.21 AICc=196.79 BIC=201.63
fit6 %>% forecast(h=10) %>%
autoplot(aus_airpassengers)

#The forecast is different from previous, but shows similarly trending upward with the ARIMA(0,2,1) with the steepest increase.
#8.For the United States GDP series (from global_economy):
US<-global_economy %>%
filter(Country =="United States")
US %>%
autoplot(GDP) +
labs(y = "GDP", title = "United States GDP")

#a.if necessary, find a suitable Box-Cox transformation for the data;
lambda_U<-US%>%
features(GDP,features=guerrero)%>%
pull(lambda_guerrero)
lambda_U
## [1] 0.2819443
US<-US%>% mutate(GDP_U=GDP^lambda_U)
US%>%
autoplot(box_cox(GDP, lambda_U))+
labs(y="",
title=latex2exp::TeX(paste0(
"Transformed Sales with $$\\lambda_U$ =",
round(lambda_U,2))))

US%>%autoplot(GDP_U)

#b.fit a suitable ARIMA model to the transformed data using ARIMA();
fit_U<-
US %>%
model(ARIMA(GDP_U))
report(fit_U)
## Series: GDP_U
## Model: ARIMA(1,1,0) w/ drift
##
## Coefficients:
## ar1 constant
## 0.4586 33.3208
## s.e. 0.1198 2.6798
##
## sigma^2 estimated as 435.6: log likelihood=-253.16
## AIC=512.32 AICc=512.77 BIC=518.45
#c.try some other plausible models by experimenting with the orders chosen;
US%>%
features(GDP_U,unitroot_kpss)
## # A tibble: 1 × 3
## Country kpss_stat kpss_pvalue
## <fct> <dbl> <dbl>
## 1 United States 1.55 0.01
US%>%
features(GDP_U,unitroot_ndiffs)
## # A tibble: 1 × 2
## Country ndiffs
## <fct> <int>
## 1 United States 1
US%>%
features(GDP_U,unitroot_nsdiffs)
## # A tibble: 1 × 2
## Country nsdiffs
## <fct> <int>
## 1 United States 0
USFit<-US%>%
model(arima110= ARIMA(GDP_U ~pdq(1,1,0)),
arima120= ARIMA(GDP_U ~pdq(1,2,0)),
arima111= ARIMA(GDP_U ~pdq(1,1,1)),
stepwise = ARIMA(GDP_U),
serach = ARIMA(GDP_U, stepwise=FALSE))
USFit %>%
pivot_longer(!Country, names_to ="Model name", values_to ="Orders")
## # A mable: 5 x 3
## # Key: Country, Model name [5]
## Country `Model name` Orders
## <fct> <chr> <model>
## 1 United States arima110 <ARIMA(1,1,0) w/ drift>
## 2 United States arima120 <ARIMA(1,2,0)>
## 3 United States arima111 <ARIMA(1,1,1) w/ drift>
## 4 United States stepwise <ARIMA(1,1,0) w/ drift>
## 5 United States serach <ARIMA(1,1,0) w/ drift>
glance(USFit) %>%
arrange (AICc) %>%
select(.model:BIC)
## # A tibble: 5 × 6
## .model sigma2 log_lik AIC AICc BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima110 436. -253. 512. 513. 518.
## 2 stepwise 436. -253. 512. 513. 518.
## 3 serach 436. -253. 512. 513. 518.
## 4 arima120 539. -255. 514. 514. 518.
## 5 arima111 444. -253. 514. 515. 522.
#ARIMA (1,1,0) has the lowest AICc
#d.choose what you think is the best model and check the residual diagnostics;
US%>%
gg_tsdisplay(difference(GDP_U), plot_type='partial')
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

augment(USFit)%>%
filter(.model=='arima110')%>%
features(.innov, ljung_box, lag=10, dof=3)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States arima110 3.81 0.801
#e.produce forecasts of your fitted model. Do the forecasts look reasonable?
USFit %>%
forecast(h=5) %>%
filter(.model=='arima110') %>%
autoplot(US)

#f.compare the results with what you would obtain using ETS() (with no transformation).
us_ets<-US %>%
model(ETS(GDP))
fc<- us_ets %>%
forecast(h=10) %>%
autoplot(US)
fc

report(us_ets)
## Series: GDP
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9990876
## beta = 0.5011949
##
## Initial states:
## l[0] b[0]
## 448093333334 64917355687
##
## sigma^2: 7e-04
##
## AIC AICc BIC
## 3190.787 3191.941 3201.089
glance(USFit) %>%
arrange (AICc) %>%
select(.model:BIC)
## # A tibble: 5 × 6
## .model sigma2 log_lik AIC AICc BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima110 436. -253. 512. 513. 518.
## 2 stepwise 436. -253. 512. 513. 518.
## 3 serach 436. -253. 512. 513. 518.
## 4 arima120 539. -255. 514. 514. 518.
## 5 arima111 444. -253. 514. 515. 522.
#Arima(1,1,0) is the best model with lowest AICc and BIC
#9.Consider aus_arrivals, the quarterly number of international visitors to Australia from several countries for the period 1981 Q1 – 2012 Q3.
View(aus_arrivals)
#a.Select one country and describe the time plot.
japan<-aus_arrivals%>%
filter(Origin=='Japan')
autoplot(japan,Arrivals)+
labs(title="Japan Arrivals")

#b.Use differencing to obtain stationary data.
japan %>%
mutate(diff_Arri=difference(Arrivals))%>%
features(diff_Arri,ljung_box,lag=10)
## # A tibble: 1 × 3
## Origin lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 Japan 346. 0
#test stat=346, p-value=0
#c.What can you learn from the ACF graph of the differenced data?
japan %>% ACF(Arrivals) %>%
autoplot() +labs(subtitle="Japan Arrivals")

#The ACF shows autocorrelation in the data.
japan %>% ACF(difference(Arrivals)) %>%
autoplot() + labs(subtitle="Changes in Japan Arrivals")

#d.What can you learn from the PACF graph of the differenced data?
japan %>% PACF(Arrivals) %>%
autoplot() +labs(subtitle="Japan Arrivals")

japan %>% PACF(difference(Arrivals)) %>%
autoplot() + labs(subtitle="Changes in Japan Arrivals")

#e.What model do these graphs suggest?
#The critical values for all of the models above beyong the interval range.
#f. Does ARIMA() give the same model that you chose? If not, which model do you think is better?
auto.arima(japan)
## Series: japan
## ARIMA(0,1,1)(1,1,1)[4]
##
## Coefficients:
## ma1 sar1 sma1
## -0.4228 -0.2305 -0.5267
## s.e. 0.0944 0.1337 0.1246
##
## sigma^2 estimated as 174801727: log likelihood=-1330.66
## AIC=2669.32 AICc=2669.66 BIC=2680.54
#g. Write the model in terms of the backshift operator, then without using the backshift operator.
#(1−B4)Yt=BY(t−1)+et
#10.Choose a series from us_employment, the total employment in different industries in the United States.
#a.Produce an STL decomposition of the data and describe the trend and seasonality.
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
model(STL(Employed)) %>%
components() %>% autoplot()

#b.Do the data need transforming? If so, find a suitable transformation.
#Yes, the data need some transformation
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
model(STL(log(Employed))) %>%
components() %>% autoplot()

#c.Are the data stationary? If not, find an appropriate differencing which yields stationary data.
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
autoplot(Employed)

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
features(Employed, unitroot_kpss)
## # A tibble: 1 × 3
## Series_ID kpss_stat kpss_pvalue
## <chr> <dbl> <dbl>
## 1 CEU7000000001 11.9 0.01
#test stat=11.9, p-value=0.01, so the data are not stationary
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
mutate(diff_employed = difference(Employed)) %>%
features(diff_employed, unitroot_kpss)
## # A tibble: 1 × 3
## Series_ID kpss_stat kpss_pvalue
## <chr> <dbl> <dbl>
## 1 CEU7000000001 0.161 0.1
#test stat=0.161, p-value=0.1, so the differenciate data are stationary
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
features(Employed, unitroot_ndiffs)
## # A tibble: 1 × 2
## Series_ID ndiffs
## <chr> <int>
## 1 CEU7000000001 1
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
mutate(log_employed = log(Employed)) %>%
features(log_employed, unitroot_nsdiffs)
## # A tibble: 1 × 2
## Series_ID nsdiffs
## <chr> <int>
## 1 CEU7000000001 1
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
mutate(log_employed = difference(log(Employed), 12)) %>%
features(log_employed, unitroot_ndiffs)
## # A tibble: 1 × 2
## Series_ID ndiffs
## <chr> <int>
## 1 CEU7000000001 1
#d.Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AICc values?
us_employment %>%
filter(Title == "Leisure and Hospitality") %>%
gg_tsdisplay(difference(Employed), plot_type='partial')
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

us_fit <- us_employment %>%
filter(Title == "Leisure and Hospitality") %>%
model(arima310 = ARIMA(Employed ~ pdq(3,1,0)),
arima210 = ARIMA(Employed ~ pdq(2,1,0)),
stepwise = ARIMA(Employed),
auto = ARIMA(Employed, stepwise=FALSE))
report (us_fit)
## Warning in report.mdl_df(us_fit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 4 × 9
## Series_ID .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 CEU7000000001 arima310 1160. -4728. 9467. 9467. 9497. <cpl [3]> <cpl [24]>
## 2 CEU7000000001 arima210 1161. -4729. 9468. 9468. 9492. <cpl [2]> <cpl [24]>
## 3 CEU7000000001 stepwise 1104. -4705. 9423. 9423. 9457. <cpl [2]> <cpl [26]>
## 4 CEU7000000001 auto 1104. -4705. 9423. 9423. 9457. <cpl [2]> <cpl [26]>
#e.Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better.
fit2 <- us_employment %>%
filter(Title == "Leisure and Hospitality") %>%
model(ARIMA(Employed)) %>%
report(fit2)
## Series: Employed
## Model: ARIMA(2,1,2)(0,1,2)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sma1 sma2
## 1.6621 -0.9333 -1.5105 0.7822 -0.4322 -0.1297
## s.e. 0.0327 0.0299 0.0585 0.0489 0.0342 0.0359
##
## sigma^2 estimated as 1104: log likelihood=-4704.55
## AIC=9423.1 AICc=9423.22 BIC=9457.14
fit2 %>% gg_tsresiduals()

augment(fit2) %>% features(.innov, ljung_box, lag=10, dof=2)
## # A tibble: 1 × 4
## Series_ID .model lb_stat lb_pvalue
## <chr> <chr> <dbl> <dbl>
## 1 CEU7000000001 ARIMA(Employed) 21.0 0.00724
#p-value=0.00724, we do have some white noise
#f.Forecast the next 3 years of data. Get the latest figures from https://fred.stlouisfed.org/categories/11 to check the accuracy of your forecasts.
ARIMA <- us_employment %>%
filter(Title == "Leisure and Hospitality") %>%
model(ARIMA(Employed))
ARIMA %>%
forecast(h = "3 years") %>%
autoplot(us_employment, level = NULL)

#g.Eventually, the prediction intervals are so wide that the forecasts are not particularly useful. How many years of forecasts do you think are sufficiently accurate to be usable?
PI <- ARIMA %>%
forecast(h = "3 years") %>%
mutate(PI = hilo(Employed, level = 95))
PI
## # A fable: 36 x 6 [1M]
## # Key: Series_ID, .model [1]
## Series_ID .model Month Employed .mean PI
## <chr> <chr> <mth> <dist> <dbl> <hilo>
## 1 CEU7000000001 ARIMA(E… 2019 Oct N(16717, 1104) 16717. [16652.22, 16782.49]95
## 2 CEU7000000001 ARIMA(E… 2019 Nov N(16519, 2569) 16519. [16419.55, 16618.24]95
## 3 CEU7000000001 ARIMA(E… 2019 Dec N(16536, 4302) 16536. [16407.56, 16664.67]95
## 4 CEU7000000001 ARIMA(E… 2020 Jan N(16206, 6108) 16206. [16053.25, 16359.61]95
## 5 CEU7000000001 ARIMA(E… 2020 Feb N(16347, 7774) 16347. [16173.82, 16519.45]95
## 6 CEU7000000001 ARIMA(E… 2020 Mar N(16606, 9159) 16606. [16418.33, 16793.48]95
## 7 CEU7000000001 ARIMA(E… 2020 Apr N(16928, 10234) 16928. [16729.77, 17126.31]95
## 8 CEU7000000001 ARIMA(E… 2020 May N(17309, 11062) 17309. [17103.16, 17515.45]95
## 9 CEU7000000001 ARIMA(E… 2020 Jun N(17746, 11753) 17746. [17533.32, 17958.29]95
## 10 CEU7000000001 ARIMA(E… 2020 Jul N(17815, 12422) 17815. [17596.34, 18033.23]95
## # … with 26 more rows
#11.Choose one of the following seasonal time series: the Australian production of electricity, cement, or gas (from aus_production).
View(aus_production)
gas<-aus_production%>%
select(Quarter,Gas)
#a.Do the data need transforming? If so, find a suitable transformation.
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`

#It shows an increasing trend variation, a BoxCox transformation is needed
lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))

#b.Are the data stationary? If not, find an appropriate differencing which yields stationary data.
aus_production %>%
gg_tsdisplay(box_cox(Gas, lambda) %>% difference(4), plot_type =
'partial')
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).

aus_production %>%
gg_tsdisplay(box_cox(Gas, lambda) %>% difference(4) %>%
difference(1), plot_type = 'partial')
## Warning: Removed 5 row(s) containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).

#c.Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AIC values?
gas_fit <- aus_production %>%
select(Gas) %>%
model(arima011 = ARIMA(Gas ~ pdq(0,1,1)),
arima012 = ARIMA(Gas ~ pdq(0,1,2)),
stepwise = ARIMA(Gas),
auto = ARIMA(Gas, stepwise=FALSE))
report (gas_fit)
## Warning in report.mdl_df(gas_fit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 4 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima011 20.8 -625. 1257. 1257. 1271. <cpl [0]> <cpl [9]>
## 2 arima012 20.6 -624. 1256. 1256. 1269. <cpl [0]> <cpl [6]>
## 3 stepwise 18.7 -616. 1243. 1243. 1260. <cpl [0]> <cpl [3]>
## 4 auto 18.7 -616. 1243. 1243. 1260. <cpl [0]> <cpl [3]>
#Arima (0,1,2) has a lower AICc and BIC. Hence, Arima (0,1,2) should be better
#d.Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better.
gas_fit %>% select(auto) %>% augment() %>%
features(.resid, ljung_box, dof = 6, lag = 12)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 auto 11.0 0.0884
#e.Forecast the next 24 months of data using your preferred model.
gas_fit %>%
select(arima012) %>%
forecast(h = "2 years") %>%
autoplot(aus_production)

#f.Compare the forecasts obtained using ETS().
aus_production %>%
model(ETS(Gas)) %>%
forecast(h = "2 years") %>%
autoplot(aus_production) +
autolayer(gas_fit %>% select(arima012) %>% forecast(h = "2 years"))

#12.For the same time series you used in the previous exercise, try using a non-seasonal
#model applied to the seasonally adjusted data obtained from STL.
#Compare the forecasts with those obtained in the previous exercise.
#Which do you think is the best approach?
gas_ts<-ts(gas$Gas,start=c(1956,1),frequency=4)
gas_stl <- stl(gas_ts,s.window='periodic')
gas_stl %>% autoplot()

#13.For the Australian tourism data (from tourism):
#a.Fit ARIMA models for each time series.
fit <- tourism %>%
model(
snaive = SNAIVE(Trips ~ lag("year")),
ets = ETS(Trips),
arima = ARIMA(Trips)
)
## Warning in sqrt(diag(best$var.coef)): NaNs produced
fit
## # A mable: 304 x 6
## # Key: Region, State, Purpose [304]
## Region State Purpose snaive ets arima
## <chr> <chr> <chr> <model> <model> <model>
## 1 Adelaide Sout… Busine… <SNAIVE> <ETS(M,N,M)> <ARIMA(0,0,0)(1,0,1)[4] w/ mean>
## 2 Adelaide Sout… Holiday <SNAIVE> <ETS(A,N,A)> <ARIMA(0,0,0)(1,0,1)[4] w/ mean>
## 3 Adelaide Sout… Other <SNAIVE> <ETS(M,A,N)> <ARIMA(0,1,1) w/ drift>
## 4 Adelaide Sout… Visiti… <SNAIVE> <ETS(A,N,A)> <ARIMA(0,0,0)(1,0,1)[4] w/ mean>
## 5 Adelaid… Sout… Busine… <SNAIVE> <ETS(A,N,N)> <ARIMA(0,0,0) w/ mean>
## 6 Adelaid… Sout… Holiday <SNAIVE> <ETS(A,A,N)> <ARIMA(0,1,1)>
## 7 Adelaid… Sout… Other <SNAIVE> <ETS(A,N,N)> <ARIMA(0,1,2)(0,0,2)[4]>
## 8 Adelaid… Sout… Visiti… <SNAIVE> <ETS(M,A,M)> <ARIMA(0,1,1)>
## 9 Alice S… Nort… Busine… <SNAIVE> <ETS(M,N,M)> <ARIMA(0,1,1)(0,0,1)[4]>
## 10 Alice S… Nort… Holiday <SNAIVE> <ETS(M,N,A)> <ARIMA(0,0,0)(0,1,2)[4]>
## # … with 294 more rows
#b.Produce forecasts of your fitted models.
fc <- forecast(fit)
fc
## # A fable: 7,296 x 7 [1Q]
## # Key: Region, State, Purpose, .model [912]
## Region State Purpose .model Quarter Trips .mean
## <chr> <chr> <chr> <chr> <qtr> <dist> <dbl>
## 1 Adelaide South Australia Business snaive 2018 Q1 N(129, 2018) 129.
## 2 Adelaide South Australia Business snaive 2018 Q2 N(174, 2018) 174.
## 3 Adelaide South Australia Business snaive 2018 Q3 N(185, 2018) 185.
## 4 Adelaide South Australia Business snaive 2018 Q4 N(197, 2018) 197.
## 5 Adelaide South Australia Business snaive 2019 Q1 N(129, 4036) 129.
## 6 Adelaide South Australia Business snaive 2019 Q2 N(174, 4036) 174.
## 7 Adelaide South Australia Business snaive 2019 Q3 N(185, 4036) 185.
## 8 Adelaide South Australia Business snaive 2019 Q4 N(197, 4036) 197.
## 9 Adelaide South Australia Business ets 2018 Q1 N(149, 1022) 149.
## 10 Adelaide South Australia Business ets 2018 Q2 N(173, 1406) 173.
## # … with 7,286 more rows
#c.Check the forecasts for the “Snowy Mountains” and “Melbourne” regions. Do they look reasonable?
fc %>%
filter(Region == "Snowy Mountains") %>%
autoplot(tourism)

fc %>%
filter(Region == "Melbourne") %>%
autoplot(tourism)

#14.For your retail time series (Exercise 5 above):
#a.develop an appropriate seasonal ARIMA model;
retail_ts<-ts(aus_retail$Turnover,start=c(1982,4),frequency=12)
retail_arima = forecast(arima(retail_ts))
#b.compare the forecasts with those you obtained in earlier chapters;
retail_hw = hw(retail_ts, seasonal = "multiplicative")
autoplot(retail_hw)

retail_hw_damped = hw(retail_ts, seasonal = "multiplicative", damped = TRUE)
autoplot(retail_hw_damped)

accuracy(retail_arima)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -5.227e-13 318.0814 180.0452 -827.383 853.0138 8.89889 0.9852806
#15.
#a.
autoplot(pelt, Hare)

#b.
auto.arima(pelt$Hare)
## Series: pelt$Hare
## ARIMA(2,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 mean
## 1.3811 -0.7120 -0.5747 45143.349
## s.e. 0.1016 0.0784 0.1240 3242.191
##
## sigma^2 estimated as 583278195: log likelihood=-1046.07
## AIC=2102.15 AICc=2102.85 BIC=2114.7
#c.
acf(pelt)

pacf(pelt)

#d.
## 1936
## ϕ1=0.82, ϕ2=−0.29, ϕ3=−0.01,ϕ4=-0.22
##σ^2[1+α^2(h-1)]
## 1937
## ϕ1=0.82, ϕ2=−0.29, ϕ3=−0.01,ϕ4=-0.22
## 1938
## ϕ1=0.82, ϕ2=−0.29, ϕ3=−0.01,ϕ4=-0.22
#e.
pelt_ts <- ts(pelt$Hare)
forecast(arima(pelt_ts))
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 92 45406.48 -953.6257 91766.59 -25495.19 116308.2
## 93 45406.48 -953.6257 91766.59 -25495.19 116308.2
## 94 45406.48 -953.6257 91766.59 -25495.19 116308.2
## 95 45406.48 -953.6257 91766.59 -25495.19 116308.2
## 96 45406.48 -953.6257 91766.59 -25495.19 116308.2
## 97 45406.48 -953.6257 91766.59 -25495.19 116308.2
## 98 45406.48 -953.6257 91766.59 -25495.19 116308.2
## 99 45406.48 -953.6257 91766.59 -25495.19 116308.2
## 100 45406.48 -953.6257 91766.59 -25495.19 116308.2
## 101 45406.48 -953.6257 91766.59 -25495.19 116308.2
#16.
Switzerland <- global_economy %>%
filter(Country == "Switzerland")
sw_ts<-ts(Switzerland$Population)
autoplot(sw_ts)

auto.arima(ts(sw_ts))
## Series: ts(sw_ts)
## ARIMA(0,2,1)
##
## Coefficients:
## ma1
## 0.8765
## s.e. 0.0962
##
## sigma^2 estimated as 130226144: log likelihood=-602.65
## AIC=1209.3 AICc=1209.53 BIC=1213.35
ggtsdisplay(diff(sw_ts))

acf(sw_ts)

pacf(sw_ts)

swfc= forecast(Arima(sw_ts))
autoplot(swfc)

#17.
library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
##
## index
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
#b.
autoplot(ibmclose) + ylab("Close Price") + xlab("Year")

ibmA <- auto.arima(ibmclose)
ibmA
## Series: ibmclose
## ARIMA(0,1,0)
##
## sigma^2 estimated as 52.62: log likelihood=-1251.37
## AIC=2504.74 AICc=2504.75 BIC=2508.64
#c.
checkresiduals(ibmA)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 14.151, df = 10, p-value = 0.1662
##
## Model df: 0. Total lags used: 10
#d.
forecast(arima(ibmclose))
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 370 478.4688 370.6839 586.2538 313.626 643.3117
## 371 478.4688 370.6839 586.2538 313.626 643.3117
## 372 478.4688 370.6839 586.2538 313.626 643.3117
## 373 478.4688 370.6839 586.2538 313.626 643.3117
## 374 478.4688 370.6839 586.2538 313.626 643.3117
## 375 478.4688 370.6839 586.2538 313.626 643.3117
## 376 478.4688 370.6839 586.2538 313.626 643.3117
## 377 478.4688 370.6839 586.2538 313.626 643.3117
## 378 478.4688 370.6839 586.2538 313.626 643.3117
## 379 478.4688 370.6839 586.2538 313.626 643.3117
plot(forecast(arima(ibmclose)))

#e.
ibm2 <- ets(ibmclose, model="ZZZ", lambda="auto")
ibm2
## ETS(A,N,N)
##
## Call:
## ets(y = ibmclose, model = "ZZZ", lambda = "auto")
##
## Box-Cox transformation: lambda= 1.9999
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 109276.6822
##
## sigma: 3191.536
##
## AIC AICc BIC
## 8139.453 8139.518 8151.185
autoplot(ibm2)

#f.
checkresiduals(ibm2)

##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,N)
## Q* = 16.112, df = 8, p-value = 0.04081
##
## Model df: 2. Total lags used: 10
#g.
ibm2F <- forecast.ets(ibm2)
ibm2F
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 370 356.9995 345.3475 368.2831 339.0172 374.1185
## 371 356.9995 340.4051 372.8561 331.2843 380.9830
## 372 356.9995 336.5634 376.3275 325.2259 386.1678
## 373 356.9995 333.2903 379.2294 320.0292 390.4853
## 374 356.9995 330.3797 381.7678 315.3798 394.2500
## 375 356.9995 327.7260 384.0482 311.1167 397.6229
## 376 356.9995 325.2667 386.1334 307.1441 400.6995
## 377 356.9995 322.9607 388.0642 303.3998 403.5421
## 378 356.9995 320.7798 389.8690 299.8405 406.1938
## 379 356.9995 318.7033 391.5683 296.4346 408.6860
autoplot(ibm2F) + ylab("Stock Close") + xlab("Year")

#h.
#ANN is more preferred.