Ch8&9 Exercises

###################Chapter 8 Exponential Smoothing
#1.Consider the the number of pigs slaughtered in Victoria, available in the 
#aus_livestock dataset.
View(aus_livestock)
pigs <- aus_livestock %>% 
  filter(Animal == 'Pigs' & State == 'Victoria')
View(pigs)
#a.Use the ETS() function to estimate the equivalent model for simple 
#exponential smoothing. Find the optimal values of α and ℓ0, 
#and generate forecasts for the next four months.
ets <- pigs %>%
  model(ses = ETS(Count ~ error('A') + trend('N') + season('N')))
optimal_values <- ets %>% report()

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

#alpha=0.3221247, ℓ0=100646.6
pig_ses<-ets %>% forecast(h = 4)
Forcast <- ets %>%
  forecast(h = 4) %>%
  autoplot(filter(pigs, Month >= yearmonth('2017 Jan'))) +
  labs(title = 'Four Month Forecast Data')
Forcast

#b.Compute a 95% prediction interval for the first forecast using ^y±1.96s where s
#is the standard deviation of the residuals. Compare your interval with the interval produced by R.
yhat<-pig_ses%>%
  pull(Count)%>%
  head(1)
standardDeviation <- augment(ets) %>%
  pull(.resid) %>%
  sd()
lower <- yhat - 1.96 * standardDeviation
upper <- yhat + 1.96 * standardDeviation
lower

## <distribution[1]>
## [1] N(76871, 8.7e+07)

upper

## <distribution[1]>
## [1] N(113502, 8.7e+07)

#The 95% prediction interval for the first forecast is from 76871 to 113502.

#2.Write your own function to implement simple exponential smoothing. 
#The function should take arguments y (the time series), alpha (the smoothing parameter) 
#and level (the initial levelℓ0). 
#It should return the forecast of the next observation in the series. 
#Does it give the same forecast as ETS()?
SES <- function(y, alpha, l0){
  yhat <- l0
  for(index in 1:length(y)){
    yhat <- alpha*y[index] + (1 - alpha)*yhat 
  }
  cat("Forecast of next observation by SES function: ",
      as.character(yhat),
      sep = "\n")
}

#3.Modify your function from the previous exercise to return the sum of squared 
#errors rather than the forecast of the next observation. Then use the optim() 
#function to find the optimal values ofα and ℓ0. 
#Do you get the same values as the ETS() function?
vic_pig <- aus_livestock %>%
  filter(Animal == "Pigs", State == "Victoria") %>%
  pull(Count)

ses2 <- function(par, y)
{
  alpha <- par[1]
  level <- par[2]
  n <- length(y)
  yhat <- numeric(n)
  yhat[1] <- level
  for(i in 2:n) 
  {
    yhat[i] <- alpha * y[i-1] + (1 - alpha) * yhat[i-1]
  }
  return(sum((y - yhat)^2) %>%
           round(3)
  )    
}
optParam <- optim(c(0.00001, vic_pig[1]), ses2, y=vic_pig)$par
optParam

## [1] 3.236967e-01 9.422438e+04

#α=3.236967e-01 and ℓ0=9.422438e+04

#4.Combine your previous two functions to produce a function that both 
#finds the optimal values of α and ℓ0, and produces a forecast of the next observation in the series.
SES <- function(init_pars, data){
  fc_next <- 0
  
  SSE <- function(pars, data){
    error <- 0
    SSE <- 0
    alpha <- pars[1]
    l0 <- pars[2]
    y_hat <- l0
    
    for(index in 1:length(data)){
      error <- data[index] - y_hat
      SSE <- SSE + error^2
      
      y_hat <- alpha*data[index] + (1 - alpha)*y_hat 
    }
    fc_next <<- y_hat
    return(SSE)
  }
  
optim_pars <- optim(par = init_pars, data = data, fn = SSE)
  
  return(list(
    Next_observation_forecast = fc_next,
    alpha = optim_pars$par[1],
    l0 = optim_pars$par[2]
  ))
}

SES(c(0.5, vic_pig[1]), vic_pig)

## $Next_observation_forecast
## [1] 95186.55
## 
## $alpha
## [1] 0.3221274
## 
## $l0
## [1] 99223.08

#α=0.3221274 and ℓ0=99223.08

#5.Data set global_economy contains the annual Exports from many countries. 
#Select one country to analyse.
View(global_economy)
vietnam<-global_economy%>%
  filter(Country=='Vietnam')
View(vietnam)
vietnam_train<- vietnam %>%
  filter(Year<="2012")
View(vietnam_train)
#a.Plot the Exports series and discuss the main features of the data.
vietnam %>%
  autoplot(Exports)+
  labs(y="Exports",title="Vietnam Exports")

## Warning: Removed 26 row(s) containing missing values (geom_path).

#b.Use an ETS(A,N.N) model to forecast the series, and plot the forecasts
est_ann<- vietnam_train$Exports %>%
  ets(model="ANN") %>%
  forecast(h=5)

## Warning in ets(., model = "ANN"): Missing values encountered. Using longest
## contiguous portion of time series

est_ann

##    Point Forecast    Lo 80    Hi 80    Lo 95     Hi 95
## 54       80.02855 71.81060 88.24651 67.46028  92.59683
## 55       80.02855 68.40719 91.64991 62.25522  97.80189
## 56       80.02855 65.79559 94.26152 58.26112 101.79599
## 57       80.02855 63.59388 96.46323 54.89389 105.16322
## 58       80.02855 61.65412 98.40299 51.92729 108.12982

est_ann %>% autoplot()

#c.Compute the RMSE values for the training data
sqrt(mean(est_ann$residuals^2))

## [1] 6.170434

#RMSE=6.170434

#d.Compare the results to those from an ETS(A,A,N) model. Discuss the merits of
#the two forecasting methods for this data set
est_aan<- vietnam_train$Exports %>%
  ets(model="AAN") %>%
  forecast(h=5)

## Warning in ets(., model = "AAN"): Missing values encountered. Using longest
## contiguous portion of time series

est_aan

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 54       79.56347 72.73426 86.39269 69.11909 90.00786
## 55       81.34833 74.51911 88.17755 70.90394 91.79272
## 56       83.08652 76.25730 89.91574 72.64213 93.53091
## 57       84.77926 77.95004 91.60848 74.33487 95.22365
## 58       86.42774 79.59852 93.25696 75.98335 96.87213

est_aan %>% autoplot()

#RMSE values
sqrt(mean(est_ann$residuals^2))

## [1] 6.170434

#RMSE=6.170434

#e.Compare the forecasts from both methods. Which do you think is best?
#ANN and AAN have the same RMSE value of 6.170434, but the forecast plot of AAN
#is closer to the original one. Hence, I think AAn is better.

#f.Calculate a 95% prediction interval for the first forecast for each model, using
#the RMSE values and assuming normal errors. Compare your intervals with those
#produced using R
#ANN
s1 <- sd(est_ann$residuals)
est_ann$mean[1] - 1.96 * s1 #lower 95%=68.96548

## [1] 68.96548

est_ann$mean[1] + 1.96 * s1 #upper 95%=91.09163

## [1] 91.09163

#AAN
s2 <- sd(est_aan$residuals)
est_aan$mean[1] - 1.96 * s2 #lower 95%=69.9559

## [1] 69.9559

est_aan$mean[1] + 1.96 * s2 #lower 95%=89.17105

## [1] 89.17105

#6.Forecast the Chinese GDP from the global_economy data set using an ETS model. 
#Experiment with the various options in the ETS() function to see how much the 
#forecasts change with damped trend, or with a Box-Cox transformation. 
#Try to develop an intuition of what each is doing to the forecasts.
china<-global_economy %>% 
  filter(Country == 'China')
china %>%
  autoplot(GDP) +
  labs(y = "GDP", title = "Chinese GDP")

#generate forecasts for the next 5 years
china_holt<- china$GDP %>%
  holt() %>%
  forecast(h = 5)
china_holt %>% autoplot()

#7.Find an ETS model for the Gas data from aus_production and forecast the next few years. 
#Why is multiplicative seasonality necessary here? Experiment with making the trend damped. 
#Does it improve the forecasts?
aus_production %>%
  autoplot(Gas)

ts <- ts(data=aus_production$Gas, frequency=4)
gas <- ets(ts, model="MAM")
#forecast the next 5 years
fc_gas <- gas %>% forecast(h=20) 
fc_gas %>% autoplot()

gas

## ETS(M,A,M) 
## 
## Call:
##  ets(y = ts, model = "MAM") 
## 
##   Smoothing parameters:
##     alpha = 0.6529 
##     beta  = 0.1442 
##     gamma = 0.0978 
## 
##   Initial states:
##     l = 5.9456 
##     b = 0.0706 
##     s = 0.9309 1.1779 1.0749 0.8163
## 
##   sigma:  0.057
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

#alpha=0.6529 beta=0.1442
#AIC:1680.929 AICc:1681.794 BIC:1711.389 
#Multiplicative seasonality is necessary because the variation changes with proportion to level,

#trend damped
gas_damped <- ets(ts, model="MAM", damped=TRUE)
fc_gas_damped <- gas_damped %>% forecast(h=20)
fc_gas_damped %>% autoplot()

gas_damped

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = ts, model = "MAM", damped = TRUE) 
## 
##   Smoothing parameters:
##     alpha = 0.6489 
##     beta  = 0.1551 
##     gamma = 0.0937 
##     phi   = 0.98 
## 
##   Initial states:
##     l = 5.8589 
##     b = 0.0994 
##     s = 0.9282 1.1779 1.0768 0.8171
## 
##   sigma:  0.0573
## 
##      AIC     AICc      BIC 
## 1684.028 1685.091 1717.873

#alpha=0.6489 beta=0.1551
#AIC:1684.028 AICc:1685.091 BIC:1717.873 
#By comparing AICc and BIC, we can not conclude that the trend damped improves the model

#8.Recall your retail time series data
#a.Why is multiplicative seasonality necessary for this series?
set.seed(333)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
ts <- ts(data=myseries$Turnover, frequency=12)
ts%>% autoplot()

retail_hw <- ets(ts, model="MAM")
#forecast the next 5 years
fc_retail_hw <- retail_hw %>% forecast(h=60)
fc_retail_hw %>% autoplot()

#b.Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
retail_hw_damped <- ets(ts, model="MAM", damped=TRUE)
fc_retail_hw_damped <- retail_hw_damped %>% forecast(h=60)
fc_retail_hw_damped %>% autoplot()

#c.Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
sqrt(mean(fc_retail_hw$residuals^2))

## [1] 0.09928151

#RMSE=0.09928151
sqrt(mean(fc_retail_hw_damped$residuals^2))

## [1] 0.1008364

#RMSE=0.1008364
#The non damped one is more preferred since it has a lower RMSE.

#d.Check that the residuals from the best method look like white noise.
checkresiduals(retail_hw)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,A,M)
## Q* = 81.106, df = 8, p-value = 2.931e-14
## 
## Model df: 16.   Total lags used: 24

#The residual plot does look like white noise

#e.Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
train <- myseries %>%
  filter(year(Month) <= 2010)
view(train)
test <- myseries %>%
  filter(year(Month) > 2010)
ts_train <- ts(data=train$Turnover, frequency=12)
ts_test <- ts(data=test$Turnover, frequency=12)

retail_hw_train <- ets(ts_train, model="MAM")
sqrt(mean(retail_hw_train$residuals^2))

## [1] 0.09307003

#RMSE=0.09307003
retail_hw_test <- ets(ts_test, model="MAM")
sqrt(mean(retail_hw_test$residuals^2))

## [1] 0.08166493

#RMSE=0.08166493

fc <- retail_hw_train %>%
  forecast(new_data = anti_join(ts, ts_train))
retail_hw_train %>% accuracy()

##                       ME      RMSE       MAE        MPE    MAPE      MASE
## Training set -0.03733977 0.6889633 0.5020404 -0.9923539 7.02562 0.4540452
##                  ACF1
## Training set 0.035107

fc %>% accuracy(ts)

##                       ME      RMSE       MAE         MPE     MAPE      MASE
## Training set -0.03733977 0.6889633 0.5020404  -0.9923539  7.02562 0.4540452
## Test set     -3.82432232 3.9775350 3.8243223 -74.3310267 74.33103 3.4587163
##                   ACF1 Theil's U
## Training set 0.0351070        NA
## Test set     0.1770195  2.424308

#Based on the result, the accuracy of the training set is much better than the
#test set. The HW outperform the Seasonal Naive model from Exercise 7 in Section 5.11

#9.For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, 
#followed by ETS on the seasonally adjusted data. 
#How does that compare with your best previous forecasts on the test set?
ts_stl <- stl(ts_train, s.window='periodic')
ts_stl %>% autoplot()

ts_bc_ets<- ts_train %>%
  ets(lambda = BoxCox.lambda(ts_train)) %>%
  forecast(h=10)
ts_bc_ets %>% autoplot()

fc <- ts_bc_ets %>%
  forecast(new_data = anti_join(timeseries, timeseries_train))
ts_bc_ets %>% accuracy()

##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.03998483 0.6803232 0.4939701 0.1571171 6.864127 0.4467464
##                    ACF1
## Training set 0.03852586

#By comparing the RMSE, the STL decomposition performs better than the previous one

#10.Compute the total domestic overnight trips across Australia from the tourism dataset.
View(tourism)
#a.Plot the data and describe the main features of the series.
aus_trip<-tourism %>%
  summarise(Trips = sum(Trips))
aus_trip %>% autoplot(Trips) +
  labs(title = "Australian Overnight Trips")

#The plot shows a decreasing trend from 2000 to 2010, then start to raise after 2010

#b.Decompose the series using STL and obtain the seasonally adjusted data.
aus_trip_stl <- aus_trip %>%
  model(STL(Trips)) %>% 
  components()
aus_trip_stl %>% autoplot()

aus_trip_stl %>% as_tsibble() %>%
  autoplot(season_adjust) +
  labs(title = "Australian Overnight Trips (STL)")

#c.Forecast the next two years of the series using an additive damped trend method applied to the seasonally adjusted data. (This can be specified using decomposition_model().)
aus_trip %>% 
  model(
    decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A") + trend("Ad") + season("N")))) %>%
  forecast(h = "2 years") %>% autoplot(aus_trip) +
  labs(title = "Australian Overnight Trips with Additive Damped Trend Forecast")

#d.Forecast the next two years of the series using an appropriate model for Holt’s linear method applied to the seasonally adjusted data (as before but without damped trend).
aus_trip %>%
  model(
    decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A") + trend("A") + season("N")))) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trip) +
  labs(title = "Australian Overnight Trips with Holt's Linear Method Forecast")

#e.Now use ETS() to choose a seasonal model for the data.
aus_trip %>%
  model(ETS(Trips)) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trip) +
  labs(title = "Australian Overnight Trips (ETS)")

#f.Compare the RMSE of the ETS model with the RMSE of the models you obtained using STL decompositions. Which gives the better in-sample fits?
compare_stl <- aus_trip %>%
  model(
    stl_AAdn = decomposition_model(STL(Trips),
                                   ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))),
    stl_AAN = decomposition_model(STL(Trips),
                                  ETS(season_adjust ~ error("A") + 
                                        trend("A") + 
                                        season("N"))),
    ETS = ETS(Trips)
  )
accuracy(compare_stl)

## # A tibble: 3 × 10
##   .model   .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>    <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 stl_AAdn Training 103.   763.  576. 0.367  2.72 0.607 0.629 -0.0174 
## 2 stl_AAN  Training  99.7  763.  574. 0.359  2.71 0.604 0.628 -0.0182 
## 3 ETS      Training 105.   794.  604. 0.379  2.86 0.636 0.653 -0.00151

#By comparing the ME and RMSE, the stl_aan perform better

#g.Compare the forecasts from the three approaches? Which seems most reasonable?
compare_stl %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trip, level = NULL) +
  labs(title = "Australian Overnight Trips: Comparing Forecasts") +
  guides(colour=guide_legend(title="Types of Forecast"))

#Based on the plot, stl_aan tends to be the average of ets and stl_aadn.
#So, stl_aan should be more reasonable

#h.Check the residuals of your preferred model.
resid_aan <- compare_stl %>% 
  select(stl_AAN)
resid_aan %>%gg_tsresiduals()

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

#11.For this exercise use the quarterly number of arrivals to Australia from New Zealand, 1981 Q1 – 2012 Q3, from data set aus_arrivals.
View(aus_arrivals)
nz<-aus_arrivals%>%
  filter(Origin=='NZ')
View(nz)
#a.Make a time plot of your data and describe the main features of the series.
autoplot(nz,Arrivals)+
  labs(title = "New Zealand Arrivals in Australia")

#b.Create a training set that withholds the last two years of available data. Forecast the test set using an appropriate model for Holt-Winters’ multiplicative method.
train<-nz %>%
  filter(Quarter<yearquarter('2011 Q1'))
View(train)

train %>%
  model(
    ETS(Arrivals ~ error("M") +
          trend("A") +
          season("M"))) %>%
  forecast(h = 8) %>%
  autoplot(level=NULL,colour = "red") +
  autolayer(nz, Arrivals) +
  labs(title = "Train vs. Actual New Zealand Arrivals in Australia")

#c.Why is multiplicative seasonality necessary here?
#Because the seasonal variation is increasing.

#d.Forecast the two-year test set using each of the following methods:
##an ETS model;
##an additive ETS model applied to a log transformed series;
##a seasonal naïve method;
##an STL decomposition applied to the log transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
compare_models <- anti_join(nz,train,
                            by = c("Quarter",
                                   "Origin",
                                   "Arrivals"))
arrivals_models <- nz %>%
  model(
    ETS_mod = ETS(Arrivals),
    Log_mod = ETS(log(Arrivals) ~ error("A") +
                    trend("A") +
                    season("A")),
    SNAIVE_mod = SNAIVE(Arrivals),
    STL_decomp_log = decomposition_model(STL(log(Arrivals)),
                                         ETS(season_adjust)))

arrivals_f <- arrivals_models %>% forecast(h = 8)
arrivals_f %>% autoplot(level = NULL) +
  autolayer(compare_models, Arrivals) +
  labs(title = "Train vs. Actual New Zealand Arrivals in Australia") +
  guides(colour = guide_legend(title = "Forecast"))

#e.Which method gives the best forecasts? Does it pass the residual tests?
#ETS_mod gives the best forecasts.
aus_optimal <- arrivals_models %>% select('ETS_mod')
aus_optimal %>% gg_tsresiduals()

#f.Compare the same four methods using time series cross-validation instead of using a training and test set. Do you come to the same conclusions?
arrivals_models_crossval <- nz %>% 
  slice(1:(n()-3)) %>%
  stretch_tsibble(.init = 36, .step = 3)

arrivals_models_crossval %>%
  model(
    ETS_mod = ETS(Arrivals),
    Add_Log = ETS(log(Arrivals) ~ error("A") +
                    trend("A") +
                    season("A")),
    SNAIVE_mod = SNAIVE(Arrivals),
    STL_decomp_log = decomposition_model(STL(log(Arrivals)),
                                         ETS(season_adjust))
  ) %>%
  forecast(h = 3) %>%
  accuracy(nz)

## # A tibble: 4 × 11
##   .model         Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Add_Log        NZ     Test  -375. 14347. 11021. 0.200  5.83 0.741 0.746 0.309
## 2 ETS_mod        NZ     Test  4627. 15327. 11799. 2.23   6.45 0.793 0.797 0.283
## 3 SNAIVE_mod     NZ     Test  8244. 18768. 14422. 3.83   7.76 0.970 0.976 0.566
## 4 STL_decomp_log NZ     Test  4252. 15618. 11873. 2.04   6.25 0.798 0.812 0.244

#The Add_Log is the best model with the lowest RMSE value.

#13.Compare ETS(), SNAIVE() and decomposition_model(STL, ???) on the following five time series. 
#You might need to use a Box-Cox transformation for the STL decomposition forecasts. 
#Use a test set of three years to decide what gives the best forecasts.
##Beer and bricks production from aus_production.
aus_production %>% autoplot(Beer) +
  labs(title = "Australian Beer Production")

beer_f <- aus_production %>%
  slice(1:(nrow(aus_production)-12)) %>%
  model(
    ETS_beer = ETS(Beer),
    SNAIVE_beer = SNAIVE(Beer),
    STL_Decomp_beer = decomposition_model(STL(log(Beer)),
                                          ETS(season_adjust))
  ) %>%  forecast(h = "3 years")

beer_f %>%
  autoplot(filter_index(aus_production, "2000 Q1" ~ .), level = NULL) +
  labs(title = "Australian Beer Production")

beer_f %>% accuracy(aus_production)

## # A tibble: 3 × 10
##   .model          .type     ME  RMSE   MAE      MPE  MAPE  MASE RMSSE  ACF1
##   <chr>           <chr>  <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS_beer        Test   0.127  9.62  8.92  0.00998  2.13 0.563 0.489 0.376
## 2 SNAIVE_beer     Test  -2.92  10.8   9.75 -0.651    2.34 0.616 0.549 0.325
## 3 STL_Decomp_beer Test  -2.85   9.87  8.95 -0.719    2.16 0.565 0.502 0.283

aus_bricks <- aus_production %>% filter(!is.na(Bricks)) #### FIXXXXXXX
aus_bricks %>% autoplot(Bricks) + 
  labs(title = "Australian Quarterly Brick Production")

bricks_f <- aus_bricks %>% slice(1:(nrow(aus_bricks)-12)) %>%
  model(
    ETS = ETS(Bricks),
    SNAIVE = SNAIVE(Bricks),
    STL_Decomp = decomposition_model(STL(log(Bricks)),
                                     ETS(season_adjust))) %>%
  forecast(h = "3 years")
bricks_f %>%
  autoplot(filter_index(aus_bricks, "1980 Q1" ~ .), level = NULL) +
  labs(title = "Australian Quarterly Brick Production")

bricks_f %>% accuracy(aus_bricks)

## # A tibble: 3 × 10
##   .model     .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>      <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS        Test   2.27  17.5  13.2 0.474  3.31 0.365 0.354  0.339 
## 2 SNAIVE     Test  32.6   36.5  32.6 7.85   7.85 0.898 0.739 -0.322 
## 3 STL_Decomp Test   9.71  18.7  14.9 2.29   3.65 0.411 0.378  0.0933

##Cost of drug subsidies for diabetes (ATC2 == "A10") and corticosteroids (ATC2 == "H02") from PBS.
drug_subsidies <- PBS %>%
  filter(ATC2 %in% c("A10", "H02")) %>%
  group_by(ATC2) %>%
  summarise(Cost = sum(Cost))
drug_subsidies %>%
  autoplot(Cost) + facet_wrap(vars(ATC2, scales = "free_y")) +
  labs(title = "Diabetes & Corticosteroids Drug Subsidy Costs")

cortico <- drug_subsidies %>%
  filter(ATC2 %in% 'H02')

cortico_cost <- cortico %>%
  features(Cost, features = guerrero) %>%
  pull(lambda_guerrero)

cortico_f <-cortico %>% filter(Month < max(Month) - 35) %>%
  model(
    ETS = ETS(Cost),
    SNAIVE = SNAIVE(Cost),
    STL_Decomp = decomposition_model(STL(box_cox(log(Cost), cortico_cost)), ETS(season_adjust))
  ) %>%
  forecast(h = "3 years")
##Total food retailing turnover for Australia from aus_retail.
View(aus_retail)
aus_food <- aus_retail %>%
  filter(Industry=="Food retailing") %>%
  summarise(Turnover=sum(Turnover))
aus_food %>%
  autoplot(Turnover) +
  labs(title = "Australian Food Turnover")

food_turnover <- aus_food %>% features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)
aus_food_f <- aus_food %>%
  filter(Month < max(Month) - 35) %>%
  model(ETS = ETS(Turnover),
        SNAIVE = SNAIVE(Turnover),
        STL_Decomp = decomposition_model(STL(box_cox(log(Turnover),
                                                     food_turnover)),
                                         ETS(season_adjust))
        ) %>%
  forecast(h = "3 years")
aus_food_f %>% autoplot(filter_index(aus_food, "2005 Jan" ~ .), level = NULL) +
  labs(title = "Australian Food Turnover Forecast Comparisons")

aus_food_f %>% accuracy(aus_food)

## # A tibble: 3 × 10
##   .model     .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS        Test  -151.    194.  170. -1.47   1.65 0.639 0.634 0.109
## 2 SNAIVE     Test   625.    699.  625.  5.86   5.86 2.35  2.29  0.736
## 3 STL_Decomp Test    -8.34  155.  132. -0.135  1.24 0.496 0.506 0.256

#14.a.Use ETS() to select an appropriate model for the following series: total number of trips across Australia using tourism, the closing prices for the four stocks in gafa_stock, and the lynx series in pelt. Does it always give good forecasts?
aus_tourism <- tourism %>%
  summarise(Trips = sum(Trips))

tourism_trips <- aus_tourism %>%
  model(ETS(Trips))

tourism_trips %>%
  forecast() %>%
  autoplot(aus_tourism) +
  labs(title = "Australian Tourism Forecast")

gafa_stock %>%
  autoplot(Close) +
  facet_grid(vars(Symbol), scales = "free_y") +
  labs(title = "Closing Stock Prices")

gafa_avg <- gafa_stock %>%
  group_by(Symbol) %>%
  mutate(trading_day = row_number()) %>%
  ungroup() %>%
  as_tsibble(index = trading_day, regular = TRUE)

gafa_fc <- fit <- gafa_avg %>%
  model(
    ETS(Close)
  )

gafa_fc %>%
  forecast(h=50) %>%
  autoplot(gafa_avg %>% group_by_key() %>% slice((n() - 100):n())) +
  labs(title = "Forecasted Closing Stock Prices")

## `mutate_if()` ignored the following grouping variables:
## • Column `Symbol`

#b.Find an example where it does not work well. Can you figure out why?
pelt %>%
  model(ETS(Lynx))

## # A mable: 1 x 1
##    `ETS(Lynx)`
##        <model>
## 1 <ETS(A,N,N)>

pelt %>%
  model(ETS(Lynx)) %>% forecast(h = 10) %>%
  autoplot(pelt) +
  labs(title = "Forecasted Pelt Trades")

#The ETS does not always give good forecasts as seen by the pelt forecast, 
#where the forecast does not seem to give any accurate indication of future pelt trades

#15.Show that the point forecasts from an ETS(M,A,M) model are the same as those obtained using Holt-Winters’ multiplicative method.
usdeaths.ets2 <- ets(usdeaths,model = "MAM")
usdeaths.ets2fc <- forecast(usdeaths.ets2, h=1)
usdeaths.ets2fc$mean

##           Jan
## 1979 8233.107

usdeaths.hwM <- hw(usdeaths, seasonal = 'multiplicative', h=1)
usdeaths.hwM$mean

##          Jan
## 1979 8217.64

#16.Show that the forecast variance for an ETS(A,N,N) model is given by σ^2[1+α^2(h−1)]
ibm_ets <- ets(ibmclose, model = "ANN")
ibm_ets_fc <- forecast(ibm_ets, 1)
ibm_ets_fc

##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 370       356.9995 347.6907 366.3083 342.7629 371.2361

ibm_sig <- 7.2637
alpha <- .9999^2
h <- 2
conf_int <- ibm_sig*(1+alpha*(h-1))
ibm_ets_fc$mean[1]

## [1] 356.9995

#356.9995
ibm_ets_fc$mean[1] - conf_int

## [1] 342.4736

#342.4736
ibm_ets_fc$lower[1, '95%']

##      95% 
## 342.7629

#342.7629

#17.Write down 95% prediction intervals for an ETS(A,N,N) model 
#as a function of ℓT,α,h and σ, assuming normally distributed errors.
#yhatT+h|T ± 1.96^estimate_std_dev

###################Chapter 9 Exponential Smoothing
#1a.All figures show white noise but with different critical values.
#b.The critical values are at different distances from zero because the data sets
#have different number of observations. The more the observations, the less the noise

#2.A classic example of a non-stationary series are stock prices. Plot the daily 
#closing prices for Amazon stock (contained in gafa_stock),along with the ACF and PACF. 
#Explain how each plot shows that the series is non-stationary and should be differenced.
gafa_stock %>%
  filter(Symbol=="AMZN")%>%
  autoplot(Close)+labs(y="$", title="Amazon CLOSE")

gafa_stock %>%
  filter(Symbol=="AMZN")%>%
  ACF(Close)%>%autoplot()

## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

#The ACF plot indicates autocorrelation with a downward decreasing lag without a seasonality pattern.
#All lags exhibiting correlations significantly different from zero. 

gafa_stock %>%
  filter(Symbol=="AMZN")%>%
  PACF(Close)%>%autoplot()

## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

#The PCAF also lags exhibiting partial correlations significantly different from zero.

#3.For the following series, find an appropriate Box-Cox transformation and order 
#of differencing in order to obtain stationary data.
#a.Turkish GDP from global_economy.
Turkey<-global_economy %>%
  filter(Country=="Turkey")
Turkey%>%
  autoplot(GDP)+labs(y="$", title="TURKEY GDP")

lambda<-Turkey%>%
  features(GDP,features=guerrero)%>%
  pull(lambda_guerrero)
lambda

## [1] 0.1572187

Turkey%>% mutate(newG=GDP^lambda)

## # A tsibble: 58 x 10 [1Y]
## # Key:       Country [1]
##    Country Code   Year       GDP Growth     CPI Imports Exports Population  newG
##    <fct>   <fct> <dbl>     <dbl>  <dbl>   <dbl>   <dbl>   <dbl>      <dbl> <dbl>
##  1 Turkey  TUR    1960   1.40e10  NA    5.40e-5    3.67    2.06   27472331  39.4
##  2 Turkey  TUR    1961   8.02e 9   1.16 5.57e-5    6.79    5.12   28146893  36.1
##  3 Turkey  TUR    1962   8.92e 9   5.57 5.78e-5    7.97    5.60   28832805  36.7
##  4 Turkey  TUR    1963   1.04e10   9.07 6.15e-5    6.97    4.18   29531342  37.5
##  5 Turkey  TUR    1964   1.12e10   5.46 6.22e-5    5.47    4.47   30244232  38.0
##  6 Turkey  TUR    1965   1.19e10   2.82 6.50e-5    5.40    4.56   30972965  38.4
##  7 Turkey  TUR    1966   1.41e10  11.2  7.05e-5    5.66    4.09   31717477  39.4
##  8 Turkey  TUR    1967   1.57e10   4.73 8.04e-5    4.96    4.11   32477961  40.1
##  9 Turkey  TUR    1968   1.75e10   6.78 8.53e-5    5.08    3.68   33256432  40.8
## 10 Turkey  TUR    1969   1.95e10   4.08 8.95e-5    4.74    3.60   34055361  41.5
## # … with 48 more rows

Turkey%>%
  autoplot(box_cox(GDP, lambda))+
  labs(y="",
       title=latex2exp::TeX(paste0(
         "Transformed GDP with $$\\lambda$ =",
         round(lambda,2))))

Turkey<-Turkey%>% mutate(newG=GDP^lambda)
Turkey%>%autoplot(newG)

Turkey%>% features(newG,ljung_box, lag=10)

## # A tibble: 1 × 3
##   Country lb_stat lb_pvalue
##   <fct>     <dbl>     <dbl>
## 1 Turkey     341.         0

Turkey %>%
  mutate(diff_close = difference(newG)) %>%
  features(diff_close, ljung_box, lag = 10)

## # A tibble: 1 × 3
##   Country lb_stat lb_pvalue
##   <fct>     <dbl>     <dbl>
## 1 Turkey     5.84     0.829

Turkey%>%
  ACF(difference(newG))%>%autoplot()

Turkey%>%
  PACF(difference(newG))%>%
  autoplot()

#ACF and PACF are all within the interval

#b.Accommodation takings in the state of Tasmania from aus_accommodation.
Tasmania<-aus_accommodation %>%
  filter(State=="Tasmania")
Tasmania%>%
  autoplot(Takings)+labs(y="$", title="Tasmania GDP")

lambda1<-Tasmania%>%
  features(Takings,features=guerrero)%>%
  pull(lambda_guerrero)
Tasmania<-Tasmania%>% mutate(newT=Takings^lambda1)
Tasmania%>%
  autoplot(box_cox(Takings, lambda1))+
  labs(y="",
       title=latex2exp::TeX(paste0(
         "Transformed Takings with $$\\lambda1$ =",
         round(lambda1,2))))

Tasmania%>%autoplot(newT)

Tasmania%>% features(newT,unitroot_kpss)

## # A tibble: 1 × 3
##   State    kpss_stat kpss_pvalue
##   <chr>        <dbl>       <dbl>
## 1 Tasmania      1.83        0.01

#1st diff
Tasmania %>%
  mutate(diff_Taking = difference(newT))%>%
  features(diff_Taking, unitroot_kpss)

## # A tibble: 1 × 3
##   State    kpss_stat kpss_pvalue
##   <chr>        <dbl>       <dbl>
## 1 Tasmania     0.256         0.1

Tasmania<-Tasmania %>%
  mutate(diff_Taking = difference(newT))

Tasmania %>%
  features(diff_Taking, unitroot_ndiffs)

## # A tibble: 1 × 2
##   State    ndiffs
##   <chr>     <int>
## 1 Tasmania      0

Tasmania%>%
  ACF((diff_Taking))%>%autoplot()

Tasmania%>%
  PACF((diff_Taking))%>%
  autoplot()

Tasmania %>%
  features(diff_Taking, unitroot_nsdiffs)

## # A tibble: 1 × 2
##   State    nsdiffs
##   <chr>      <int>
## 1 Tasmania       1

#suggests 1 seasonal diff
Tasmania<-Tasmania %>%
  mutate(diff_Taking2 = difference(diff_Taking,12))
Tasmania%>%
  ACF((diff_Taking2))%>%
  autoplot()

Tasmania%>%
  PACF((diff_Taking2))%>%
  autoplot()

Tasmania%>% autoplot(diff_Taking2)

## Warning: Removed 13 row(s) containing missing values (geom_path).

#c.Monthly sales from souvenirs.
souvenirs %>%
  autoplot(Sales)+labs(y="$", title="Souvenir sales")

lambda2<-souvenirs%>%
  features(Sales,features=guerrero)%>%
  pull(lambda_guerrero)
SouvSales<-souvenirs%>% mutate(newS=Sales^lambda2)
SouvSales%>%
  autoplot(box_cox(Sales, lambda2))+
  labs(y="",
       title=latex2exp::TeX(paste0(
         "Transformed Sales with $$\\lambda2$ =",
         round(lambda2,2))))

SouvSales%>%autoplot(newS)

SouvSales%>% features(newS,unitroot_kpss)

## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1      1.79        0.01

SouvSales %>%
  mutate(diff_Sales = difference(newS))%>%
  features(diff_Sales, unitroot_kpss)

## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1    0.0631         0.1

#1st diff
SouvSales <-SouvSales  %>%
  mutate(diff_Sales = difference(newS))
SouvSales %>%
  features(diff_Sales, unitroot_ndiffs)

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      0

SouvSales%>%
  ACF((diff_Sales))%>%autoplot()

SouvSales%>%
  PACF((diff_Sales))%>%
  autoplot()

SouvSales %>%
  features(diff_Sales, unitroot_nsdiffs)

## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       1

#1 seasonal diff
SouvSales<-SouvSales %>%
  mutate(diff_Sales2 = difference(diff_Sales,12))
SouvSales%>%
  ACF((diff_Sales2))%>%
  autoplot()

SouvSales%>%
  PACF((diff_Sales2))%>%
  autoplot()

SouvSales%>% autoplot(diff_Sales2)

## Warning: Removed 13 row(s) containing missing values (geom_path).

#4.For the souvenirs data, write down the differences you chose above using backshift operator notation.
#These series was transformed with a boxcox transformation first, and then calculate 1st differenced and a seasonality difference. It shows improvement towards stationary, 
#ACF and PACF plots indicate the model still needs more work since the critical values exceed the boundaries

#5.For your retail data (from Exercise 8 in Section 2.10), find the appropriate 
#order of differencing (after transformation if necessary) to obtain stationary data.
aus_retail %>% distinct(Industry)

## # A tibble: 20 × 1
##    Industry                                                         
##    <chr>                                                            
##  1 Cafes, restaurants and catering services                         
##  2 Cafes, restaurants and takeaway food services                    
##  3 Clothing retailing                                               
##  4 Clothing, footwear and personal accessory retailing              
##  5 Department stores                                                
##  6 Electrical and electronic goods retailing                        
##  7 Food retailing                                                   
##  8 Footwear and other personal accessory retailing                  
##  9 Furniture, floor coverings, houseware and textile goods retailing
## 10 Hardware, building and garden supplies retailing                 
## 11 Household goods retailing                                        
## 12 Liquor retailing                                                 
## 13 Newspaper and book retailing                                     
## 14 Other recreational goods retailing                               
## 15 Other retailing                                                  
## 16 Other retailing n.e.c.                                           
## 17 Other specialised food retailing                                 
## 18 Pharmaceutical, cosmetic and toiletry goods retailing            
## 19 Supermarket and grocery stores                                   
## 20 Takeaway food services

set.seed(1111111)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>% distinct(Industry)

## # A tibble: 1 × 1
##   Industry                    
##   <chr>                       
## 1 Newspaper and book retailing

head(myseries)

## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State             Industry                     `Series ID`    Month Turnover
##   <chr>             <chr>                        <chr>          <mth>    <dbl>
## 1 Western Australia Newspaper and book retailing A3349822A   1982 Apr      9.7
## 2 Western Australia Newspaper and book retailing A3349822A   1982 May     11  
## 3 Western Australia Newspaper and book retailing A3349822A   1982 Jun     10.7
## 4 Western Australia Newspaper and book retailing A3349822A   1982 Jul      9  
## 5 Western Australia Newspaper and book retailing A3349822A   1982 Aug      9.1
## 6 Western Australia Newspaper and book retailing A3349822A   1982 Sep     10

str(myseries)

## tbl_ts [441 × 5] (S3: tbl_ts/tbl_df/tbl/data.frame)
##  $ State    : chr [1:441] "Western Australia" "Western Australia" "Western Australia" "Western Australia" ...
##  $ Industry : chr [1:441] "Newspaper and book retailing" "Newspaper and book retailing" "Newspaper and book retailing" "Newspaper and book retailing" ...
##  $ Series ID: chr [1:441] "A3349822A" "A3349822A" "A3349822A" "A3349822A" ...
##  $ Month    : mth [1:441] 1982 Apr, 1982 May, 1982 Jun, 1982 Jul, 1982 Aug, 1982 Sep...
##  $ Turnover : num [1:441] 9.7 11 10.7 9 9.1 10 7.7 8.4 11.8 7.4 ...
##  - attr(*, "key")= tibble [1 × 3] (S3: tbl_df/tbl/data.frame)
##   ..$ State   : chr "Western Australia"
##   ..$ Industry: chr "Newspaper and book retailing"
##   ..$ .rows   : list<int> [1:1] 
##   .. ..$ : int [1:441] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
##  - attr(*, "index")= chr "Month"
##   ..- attr(*, "ordered")= logi TRUE
##  - attr(*, "index2")= chr "Month"
##  - attr(*, "interval")= interval [1:1] 1M
##   ..@ .regular: logi TRUE

autoplot(myseries,.vars=Turnover)

gg_season(myseries,y=Turnover)

gg_subseries(myseries,y=Turnover)

gg_lag(myseries,y=Turnover)

lambda3<-myseries%>%
  features(Turnover,features=guerrero)%>%
  pull(lambda_guerrero)
myseries<-myseries%>% mutate(newT=Turnover^lambda3)

myseries%>%
  autoplot(box_cox(Turnover, lambda3))+
  labs(y="",
       title=latex2exp::TeX(paste0(
         "Transformed Turnover with $$\\lambda3$ =",
         round(lambda3,2))))

myseries%>%autoplot(newT)

myseries%>% features(newT,unitroot_kpss)

## # A tibble: 1 × 4
##   State             Industry                     kpss_stat kpss_pvalue
##   <chr>             <chr>                            <dbl>       <dbl>
## 1 Western Australia Newspaper and book retailing      6.05        0.01

myseries %>%
  mutate(diff_Turn = difference(newT))%>%
  features(diff_Turn, unitroot_kpss)

## # A tibble: 1 × 4
##   State             Industry                     kpss_stat kpss_pvalue
##   <chr>             <chr>                            <dbl>       <dbl>
## 1 Western Australia Newspaper and book retailing    0.0187         0.1

#1st diff
myseries <-myseries  %>%
  mutate(diff_Turn = difference(newT))
myseries %>%
  features(diff_Turn, unitroot_ndiffs)

## # A tibble: 1 × 3
##   State             Industry                     ndiffs
##   <chr>             <chr>                         <int>
## 1 Western Australia Newspaper and book retailing      0

#ACF and PACF
myseries%>%
  ACF((diff_Turn))%>%autoplot()

myseries%>%
  PACF((diff_Turn))%>%
  autoplot()

myseries %>%
  features(diff_Turn, unitroot_nsdiffs)

## # A tibble: 1 × 3
##   State             Industry                     nsdiffs
##   <chr>             <chr>                          <int>
## 1 Western Australia Newspaper and book retailing       1

#suggests take 1 seasonal diff
myseries<-myseries %>%
  mutate(diff_Turn2 = difference(diff_Turn,12))

myseries%>%
  ACF((diff_Turn2))%>%
  autoplot()

myseries%>%
  PACF((diff_Turn2))%>%
  autoplot()

myseries%>% autoplot(diff_Turn2)

## Warning: Removed 13 row(s) containing missing values (geom_path).

#6.Simulate and plot some data from simple ARIMA models.
#a.
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
#b.
sim%>%
  autoplot(y)

set.seed(6543221)
y2 <- ts(numeric(100))
y3 <- ts(numeric(100))
e2 <- rnorm(100)
e3 <- rnorm(100)

for(i in 2:100){
  y2[i] <- 0.1*y2[i-1] + e2[i]
  y3[i] <- 0.9*y3[i-1] + e3[i]
}

sim <- tsibble(idx = seq_len(100), y = y, y2 = y2, y3 = y3, index = idx)

plot1 <- sim %>% autoplot(y) + labs( title = "phi = 0.6")
plot2 <- sim %>% autoplot(y2) + labs( title = "phi = 0.1")
plot3 <- sim %>% autoplot(y3) + labs( title = "phi = 0.9")
plot1

plot2

plot3

#c.
set.seed(12345566)
y4 <- numeric(100)
e4 <- rnorm(100)

for(i in 2:100)
  y4[i] <- 0.6*e4[i-1] + e4[i]
sim <- tsibble(idx = seq_len(100), y4 = y4, index = idx)
#d.
sim %>% autoplot(y4) + labs(title="MA(1) 0.6 ")

set.seed(55667788)

y5 <- ts(numeric(100))
e5 <- rnorm(100)

y6 <- ts(numeric(100))
e6 <- rnorm(100)


for(i in 2:100){
  y5[i] <- 0.1*e5[i-1] + e5[i]
  y6[i] <- 0.9*e6[i-1] + e6[i]
  
}

sim <- tsibble(idx = seq_len(100), y4 = y4, y5 = y5, y6 = y6, index = idx)

plot1a <- sim %>% autoplot(y4) + labs( title = "theta = 0.6")
plot2a <- sim %>% autoplot(y5) + labs( title = "theta = 0.1")
plot3a <- sim %>% autoplot(y6) + labs( title = "theta = 0.9")
plot1a

plot2a

plot3a

#e.
set.seed(222222)
a7 <- ts(numeric(100))
e7 <- rnorm(100)

for(i in 2:100){
  a7[i] <- 0.6*a7[i] + 0.6*e7[i-1] + e7[i] 
}
sim7 <- tsibble(idx = seq_len(100), a7 = a7, index = idx)

plot7 <- sim7 %>% autoplot(a7) + labs( title = "ARMA(1,1) theta=.6, phi=.6")
plot7

#f.
set.seed(333333)
y8 <- ts(numeric(100))
e8 <- rnorm(100)

for(i in 3:100){
  y8[i] <- -0.8*y8[i-1] + 0.3*y8[i-2] + e8[i] 
}

sim8 <- tsibble(idx = seq_len(100), y8 = y8, index = idx)
plot8 <- sim8 %>% autoplot(y8) + labs( title = "AR(2) phi=-.8,.3")
plot8

#g.
plot7

plot8

#7.Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.
View(aus_airpassengers)
autoplot(aus_airpassengers)

## Plot variable not specified, automatically selected `.vars = Passengers`

#a.Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods.
fit<-aus_airpassengers %>%
  model(ARIMA(Passengers))
report(fit)

## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65

#ARIMA(0,2,1)
fit2<-aus_airpassengers %>%
  model(arima021 =ARIMA(Passengers ~ pdq(0,2,1)))
fit2 %>%
  select(arima021) %>%
  gg_tsresiduals()

augment(fit2) %>% features(.innov, ljung_box, lag = 10)

## # A tibble: 1 × 3
##   .model   lb_stat lb_pvalue
##   <chr>      <dbl>     <dbl>
## 1 arima021    6.70     0.754

fit2 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers)

#b.Write the model in terms of the backshift operator.
#((1−B)2)Yt=(1+(Θ1)B)ϵt

#c.Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.
fit3<-aus_airpassengers %>%
  model(arima010 =ARIMA(Passengers ~ 1 + pdq(0,1,0)))
report(fit3)

## Series: Passengers 
## Model: ARIMA(0,1,0) w/ drift 
## 
## Coefficients:
##       constant
##         1.4191
## s.e.    0.3014
## 
## sigma^2 estimated as 4.271:  log likelihood=-98.16
## AIC=200.31   AICc=200.59   BIC=203.97

fit3 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers)

#d.Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.
fit4<-aus_airpassengers %>%
  model(arima212 =ARIMA(Passengers ~ 1 + pdq(2,1,2)))
report(fit4)

## Series: Passengers 
## Model: ARIMA(2,1,2) w/ drift 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2  constant
##       -0.5518  -0.7327  0.5895  1.0000    3.2216
## s.e.   0.1684   0.1224  0.0916  0.1008    0.7224
## 
## sigma^2 estimated as 4.031:  log likelihood=-96.23
## AIC=204.46   AICc=206.61   BIC=215.43

fit4 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers)

#Without constant
fit5<-aus_airpassengers %>%
  model(arima212n =ARIMA(Passengers ~ 0+pdq(2,1,2)))

## Warning: 1 error encountered for arima212n
## [1] non-stationary AR part from CSS

report(fit5)

## Series: Passengers 
## Model: NULL model 
## NULL model

fit5 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers)

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 10 row(s) containing missing values (geom_path).

#e.Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?
fit6<-aus_airpassengers %>%
  model(arima021 =ARIMA(Passengers ~ 1 +pdq(0,2,1)))

## Warning: Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.

report(fit6)

## Series: Passengers 
## Model: ARIMA(0,2,1) w/ poly 
## 
## Coefficients:
##           ma1  constant
##       -1.0000    0.0571
## s.e.   0.0585    0.0213
## 
## sigma^2 estimated as 3.855:  log likelihood=-95.1
## AIC=196.21   AICc=196.79   BIC=201.63

fit6 %>% forecast(h=10) %>%
  autoplot(aus_airpassengers)

#The forecast is different from previous, but shows similarly trending upward with the ARIMA(0,2,1) with the steepest increase.

#8.For the United States GDP series (from global_economy):
US<-global_economy %>%
  filter(Country =="United States")
US %>%
  autoplot(GDP) + 
  labs(y = "GDP", title = "United States GDP")

#a.if necessary, find a suitable Box-Cox transformation for the data;
lambda_U<-US%>%
  features(GDP,features=guerrero)%>%
  pull(lambda_guerrero)
lambda_U

## [1] 0.2819443

US<-US%>% mutate(GDP_U=GDP^lambda_U)
US%>%
  autoplot(box_cox(GDP, lambda_U))+
  labs(y="",
       title=latex2exp::TeX(paste0(
         "Transformed Sales with $$\\lambda_U$ =",
         round(lambda_U,2))))

US%>%autoplot(GDP_U)

#b.fit a suitable ARIMA model to the transformed data using ARIMA();
fit_U<-
  US %>%
  model(ARIMA(GDP_U))
report(fit_U)

## Series: GDP_U 
## Model: ARIMA(1,1,0) w/ drift 
## 
## Coefficients:
##          ar1  constant
##       0.4586   33.3208
## s.e.  0.1198    2.6798
## 
## sigma^2 estimated as 435.6:  log likelihood=-253.16
## AIC=512.32   AICc=512.77   BIC=518.45

#c.try some other plausible models by experimenting with the orders chosen;
US%>%
  features(GDP_U,unitroot_kpss)

## # A tibble: 1 × 3
##   Country       kpss_stat kpss_pvalue
##   <fct>             <dbl>       <dbl>
## 1 United States      1.55        0.01

US%>%
  features(GDP_U,unitroot_ndiffs)

## # A tibble: 1 × 2
##   Country       ndiffs
##   <fct>          <int>
## 1 United States      1

US%>%
  features(GDP_U,unitroot_nsdiffs)

## # A tibble: 1 × 2
##   Country       nsdiffs
##   <fct>           <int>
## 1 United States       0

USFit<-US%>%
  model(arima110= ARIMA(GDP_U ~pdq(1,1,0)),
        arima120= ARIMA(GDP_U ~pdq(1,2,0)),
        arima111= ARIMA(GDP_U ~pdq(1,1,1)),
        stepwise = ARIMA(GDP_U),
        serach = ARIMA(GDP_U, stepwise=FALSE))
USFit %>%
  pivot_longer(!Country, names_to ="Model name", values_to ="Orders")

## # A mable: 5 x 3
## # Key:     Country, Model name [5]
##   Country       `Model name`                  Orders
##   <fct>         <chr>                        <model>
## 1 United States arima110     <ARIMA(1,1,0) w/ drift>
## 2 United States arima120              <ARIMA(1,2,0)>
## 3 United States arima111     <ARIMA(1,1,1) w/ drift>
## 4 United States stepwise     <ARIMA(1,1,0) w/ drift>
## 5 United States serach       <ARIMA(1,1,0) w/ drift>

glance(USFit) %>%
  arrange (AICc) %>%
  select(.model:BIC)

## # A tibble: 5 × 6
##   .model   sigma2 log_lik   AIC  AICc   BIC
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 arima110   436.   -253.  512.  513.  518.
## 2 stepwise   436.   -253.  512.  513.  518.
## 3 serach     436.   -253.  512.  513.  518.
## 4 arima120   539.   -255.  514.  514.  518.
## 5 arima111   444.   -253.  514.  515.  522.

#ARIMA (1,1,0) has the lowest AICc

#d.choose what you think is the best model and check the residual diagnostics;
US%>%
  gg_tsdisplay(difference(GDP_U), plot_type='partial')

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

augment(USFit)%>%
  filter(.model=='arima110')%>%
  features(.innov, ljung_box, lag=10, dof=3)

## # A tibble: 1 × 4
##   Country       .model   lb_stat lb_pvalue
##   <fct>         <chr>      <dbl>     <dbl>
## 1 United States arima110    3.81     0.801

#e.produce forecasts of your fitted model. Do the forecasts look reasonable?
USFit %>%
  forecast(h=5) %>%
  filter(.model=='arima110') %>%
  autoplot(US)

#f.compare the results with what you would obtain using ETS() (with no transformation).
us_ets<-US %>%
  model(ETS(GDP))
fc<- us_ets %>%
  forecast(h=10) %>%
  autoplot(US)
fc

report(us_ets)

## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9990876 
##     beta  = 0.5011949 
## 
##   Initial states:
##          l[0]        b[0]
##  448093333334 64917355687
## 
##   sigma^2:  7e-04
## 
##      AIC     AICc      BIC 
## 3190.787 3191.941 3201.089

glance(USFit) %>%
  arrange (AICc) %>%
  select(.model:BIC)

## # A tibble: 5 × 6
##   .model   sigma2 log_lik   AIC  AICc   BIC
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 arima110   436.   -253.  512.  513.  518.
## 2 stepwise   436.   -253.  512.  513.  518.
## 3 serach     436.   -253.  512.  513.  518.
## 4 arima120   539.   -255.  514.  514.  518.
## 5 arima111   444.   -253.  514.  515.  522.

#Arima(1,1,0) is the best model with lowest AICc and BIC

#9.Consider aus_arrivals, the quarterly number of international visitors to Australia from several countries for the period 1981 Q1 – 2012 Q3.
View(aus_arrivals)
#a.Select one country and describe the time plot.
japan<-aus_arrivals%>%
  filter(Origin=='Japan')
autoplot(japan,Arrivals)+
  labs(title="Japan Arrivals")

#b.Use differencing to obtain stationary data.
japan %>%
  mutate(diff_Arri=difference(Arrivals))%>%
  features(diff_Arri,ljung_box,lag=10)

## # A tibble: 1 × 3
##   Origin lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 Japan     346.         0

#test stat=346, p-value=0
#c.What can you learn from the ACF graph of the differenced data?
japan %>% ACF(Arrivals) %>%
  autoplot() +labs(subtitle="Japan Arrivals")

#The ACF shows autocorrelation in the data.
japan %>% ACF(difference(Arrivals)) %>%
  autoplot() + labs(subtitle="Changes in Japan Arrivals")

#d.What can you learn from the PACF graph of the differenced data?
japan %>% PACF(Arrivals) %>%
  autoplot() +labs(subtitle="Japan Arrivals")

japan %>% PACF(difference(Arrivals)) %>%
  autoplot() + labs(subtitle="Changes in Japan Arrivals")

#e.What model do these graphs suggest?
#The critical values for all of the models above beyong the interval range.
#f. Does ARIMA() give the same model that you chose? If not, which model do you think is better?
auto.arima(japan)

## Series: japan 
## ARIMA(0,1,1)(1,1,1)[4] 
## 
## Coefficients:
##           ma1     sar1     sma1
##       -0.4228  -0.2305  -0.5267
## s.e.   0.0944   0.1337   0.1246
## 
## sigma^2 estimated as 174801727:  log likelihood=-1330.66
## AIC=2669.32   AICc=2669.66   BIC=2680.54

#g. Write the model in terms of the backshift operator, then without using the backshift operator.
#(1−B4)Yt=BY(t−1)+et

#10.Choose a series from us_employment, the total employment in different industries in the United States.
#a.Produce an STL decomposition of the data and describe the trend and seasonality.
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  model(STL(Employed)) %>%
  components() %>% autoplot()

#b.Do the data need transforming? If so, find a suitable transformation.
#Yes, the data need some transformation
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  model(STL(log(Employed))) %>%
  components() %>% autoplot()

#c.Are the data stationary? If not, find an appropriate differencing which yields stationary data.
us_employment %>% filter(Title == "Leisure and Hospitality")  %>%
  autoplot(Employed)

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  features(Employed, unitroot_kpss)

## # A tibble: 1 × 3
##   Series_ID     kpss_stat kpss_pvalue
##   <chr>             <dbl>       <dbl>
## 1 CEU7000000001      11.9        0.01

#test stat=11.9, p-value=0.01, so the data are not stationary
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  mutate(diff_employed = difference(Employed)) %>%
  features(diff_employed, unitroot_kpss)

## # A tibble: 1 × 3
##   Series_ID     kpss_stat kpss_pvalue
##   <chr>             <dbl>       <dbl>
## 1 CEU7000000001     0.161         0.1

#test stat=0.161, p-value=0.1, so the differenciate data are stationary
us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  features(Employed, unitroot_ndiffs)

## # A tibble: 1 × 2
##   Series_ID     ndiffs
##   <chr>          <int>
## 1 CEU7000000001      1

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  mutate(log_employed = log(Employed)) %>%
  features(log_employed, unitroot_nsdiffs)

## # A tibble: 1 × 2
##   Series_ID     nsdiffs
##   <chr>           <int>
## 1 CEU7000000001       1

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  mutate(log_employed = difference(log(Employed), 12)) %>%
  features(log_employed, unitroot_ndiffs)

## # A tibble: 1 × 2
##   Series_ID     ndiffs
##   <chr>          <int>
## 1 CEU7000000001      1

#d.Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AICc values?
us_employment %>%
  filter(Title == "Leisure and Hospitality")  %>%
  gg_tsdisplay(difference(Employed), plot_type='partial')

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

us_fit <- us_employment %>%
  filter(Title == "Leisure and Hospitality") %>%
  model(arima310 = ARIMA(Employed ~ pdq(3,1,0)),
        arima210 = ARIMA(Employed ~ pdq(2,1,0)),
        stepwise = ARIMA(Employed),
        auto = ARIMA(Employed, stepwise=FALSE))
report (us_fit)

## Warning in report.mdl_df(us_fit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 4 × 9
##   Series_ID     .model   sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots  
##   <chr>         <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>    
## 1 CEU7000000001 arima310  1160.  -4728. 9467. 9467. 9497. <cpl [3]> <cpl [24]>
## 2 CEU7000000001 arima210  1161.  -4729. 9468. 9468. 9492. <cpl [2]> <cpl [24]>
## 3 CEU7000000001 stepwise  1104.  -4705. 9423. 9423. 9457. <cpl [2]> <cpl [26]>
## 4 CEU7000000001 auto      1104.  -4705. 9423. 9423. 9457. <cpl [2]> <cpl [26]>

#e.Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better.
fit2 <- us_employment %>%
  filter(Title == "Leisure and Hospitality") %>%
  model(ARIMA(Employed)) %>%
  report(fit2)

## Series: Employed 
## Model: ARIMA(2,1,2)(0,1,2)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2     sma1     sma2
##       1.6621  -0.9333  -1.5105  0.7822  -0.4322  -0.1297
## s.e.  0.0327   0.0299   0.0585  0.0489   0.0342   0.0359
## 
## sigma^2 estimated as 1104:  log likelihood=-4704.55
## AIC=9423.1   AICc=9423.22   BIC=9457.14

fit2 %>% gg_tsresiduals()

augment(fit2) %>% features(.innov, ljung_box, lag=10, dof=2)

## # A tibble: 1 × 4
##   Series_ID     .model          lb_stat lb_pvalue
##   <chr>         <chr>             <dbl>     <dbl>
## 1 CEU7000000001 ARIMA(Employed)    21.0   0.00724

#p-value=0.00724, we do have some white noise
#f.Forecast the next 3 years of data. Get the latest figures from https://fred.stlouisfed.org/categories/11 to check the accuracy of your forecasts.
ARIMA <- us_employment %>%
  filter(Title == "Leisure and Hospitality") %>%
  model(ARIMA(Employed))
ARIMA %>%
  forecast(h = "3 years") %>%
  autoplot(us_employment, level = NULL)

#g.Eventually, the prediction intervals are so wide that the forecasts are not particularly useful. How many years of forecasts do you think are sufficiently accurate to be usable?
PI <- ARIMA %>%
  forecast(h = "3 years") %>%
  mutate(PI = hilo(Employed, level = 95))
PI

## # A fable: 36 x 6 [1M]
## # Key:     Series_ID, .model [1]
##    Series_ID     .model      Month        Employed  .mean                     PI
##    <chr>         <chr>       <mth>          <dist>  <dbl>                 <hilo>
##  1 CEU7000000001 ARIMA(E… 2019 Oct  N(16717, 1104) 16717. [16652.22, 16782.49]95
##  2 CEU7000000001 ARIMA(E… 2019 Nov  N(16519, 2569) 16519. [16419.55, 16618.24]95
##  3 CEU7000000001 ARIMA(E… 2019 Dec  N(16536, 4302) 16536. [16407.56, 16664.67]95
##  4 CEU7000000001 ARIMA(E… 2020 Jan  N(16206, 6108) 16206. [16053.25, 16359.61]95
##  5 CEU7000000001 ARIMA(E… 2020 Feb  N(16347, 7774) 16347. [16173.82, 16519.45]95
##  6 CEU7000000001 ARIMA(E… 2020 Mar  N(16606, 9159) 16606. [16418.33, 16793.48]95
##  7 CEU7000000001 ARIMA(E… 2020 Apr N(16928, 10234) 16928. [16729.77, 17126.31]95
##  8 CEU7000000001 ARIMA(E… 2020 May N(17309, 11062) 17309. [17103.16, 17515.45]95
##  9 CEU7000000001 ARIMA(E… 2020 Jun N(17746, 11753) 17746. [17533.32, 17958.29]95
## 10 CEU7000000001 ARIMA(E… 2020 Jul N(17815, 12422) 17815. [17596.34, 18033.23]95
## # … with 26 more rows

#11.Choose one of the following seasonal time series: the Australian production of electricity, cement, or gas (from aus_production).
View(aus_production)
gas<-aus_production%>%
  select(Quarter,Gas)
#a.Do the data need transforming? If so, find a suitable transformation.
autoplot(gas)

## Plot variable not specified, automatically selected `.vars = Gas`

#It shows an increasing trend variation, a BoxCox transformation is needed
lambda <- aus_production %>%
  features(Gas, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Gas, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed gas production with $\\lambda$ = ",
         round(lambda,2))))

#b.Are the data stationary? If not, find an appropriate differencing which yields stationary data.
aus_production %>%
  gg_tsdisplay(box_cox(Gas, lambda) %>% difference(4), plot_type =
                 'partial')

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

aus_production %>%
  gg_tsdisplay(box_cox(Gas, lambda) %>% difference(4) %>%
                 difference(1), plot_type = 'partial')

## Warning: Removed 5 row(s) containing missing values (geom_path).

## Warning: Removed 5 rows containing missing values (geom_point).

#c.Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AIC values?
gas_fit <- aus_production %>%
  select(Gas) %>%
  model(arima011 = ARIMA(Gas ~ pdq(0,1,1)),
        arima012 = ARIMA(Gas ~ pdq(0,1,2)),
        stepwise = ARIMA(Gas),
        auto = ARIMA(Gas, stepwise=FALSE))
report (gas_fit)

## Warning in report.mdl_df(gas_fit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 4 × 8
##   .model   sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arima011   20.8   -625. 1257. 1257. 1271. <cpl [0]> <cpl [9]>
## 2 arima012   20.6   -624. 1256. 1256. 1269. <cpl [0]> <cpl [6]>
## 3 stepwise   18.7   -616. 1243. 1243. 1260. <cpl [0]> <cpl [3]>
## 4 auto       18.7   -616. 1243. 1243. 1260. <cpl [0]> <cpl [3]>

#Arima (0,1,2) has a lower AICc and BIC. Hence, Arima (0,1,2) should be better
#d.Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better.
gas_fit %>% select(auto) %>% augment() %>%
  features(.resid, ljung_box, dof = 6, lag = 12)

## # A tibble: 1 × 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 auto      11.0    0.0884

#e.Forecast the next 24 months of data using your preferred model.
gas_fit %>%
  select(arima012) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_production)

#f.Compare the forecasts obtained using ETS().
aus_production %>%
  model(ETS(Gas)) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_production) +
  autolayer(gas_fit %>% select(arima012) %>% forecast(h = "2 years"))

#12.For the same time series you used in the previous exercise, try using a non-seasonal 
#model applied to the seasonally adjusted data obtained from STL. 
#Compare the forecasts with those obtained in the previous exercise. 
#Which do you think is the best approach?
gas_ts<-ts(gas$Gas,start=c(1956,1),frequency=4)
gas_stl <- stl(gas_ts,s.window='periodic')
gas_stl %>% autoplot()

#13.For the Australian tourism data (from tourism):
#a.Fit ARIMA models for each time series.
fit <- tourism %>%
  model(
    snaive = SNAIVE(Trips ~ lag("year")),
    ets = ETS(Trips),
    arima = ARIMA(Trips)
  )

## Warning in sqrt(diag(best$var.coef)): NaNs produced

fit

## # A mable: 304 x 6
## # Key:     Region, State, Purpose [304]
##    Region   State Purpose   snaive          ets                            arima
##    <chr>    <chr> <chr>    <model>      <model>                          <model>
##  1 Adelaide Sout… Busine… <SNAIVE> <ETS(M,N,M)> <ARIMA(0,0,0)(1,0,1)[4] w/ mean>
##  2 Adelaide Sout… Holiday <SNAIVE> <ETS(A,N,A)> <ARIMA(0,0,0)(1,0,1)[4] w/ mean>
##  3 Adelaide Sout… Other   <SNAIVE> <ETS(M,A,N)>          <ARIMA(0,1,1) w/ drift>
##  4 Adelaide Sout… Visiti… <SNAIVE> <ETS(A,N,A)> <ARIMA(0,0,0)(1,0,1)[4] w/ mean>
##  5 Adelaid… Sout… Busine… <SNAIVE> <ETS(A,N,N)>           <ARIMA(0,0,0) w/ mean>
##  6 Adelaid… Sout… Holiday <SNAIVE> <ETS(A,A,N)>                   <ARIMA(0,1,1)>
##  7 Adelaid… Sout… Other   <SNAIVE> <ETS(A,N,N)>         <ARIMA(0,1,2)(0,0,2)[4]>
##  8 Adelaid… Sout… Visiti… <SNAIVE> <ETS(M,A,M)>                   <ARIMA(0,1,1)>
##  9 Alice S… Nort… Busine… <SNAIVE> <ETS(M,N,M)>         <ARIMA(0,1,1)(0,0,1)[4]>
## 10 Alice S… Nort… Holiday <SNAIVE> <ETS(M,N,A)>         <ARIMA(0,0,0)(0,1,2)[4]>
## # … with 294 more rows

#b.Produce forecasts of your fitted models.
fc <- forecast(fit)
fc

## # A fable: 7,296 x 7 [1Q]
## # Key:     Region, State, Purpose, .model [912]
##    Region   State           Purpose  .model Quarter        Trips .mean
##    <chr>    <chr>           <chr>    <chr>    <qtr>       <dist> <dbl>
##  1 Adelaide South Australia Business snaive 2018 Q1 N(129, 2018)  129.
##  2 Adelaide South Australia Business snaive 2018 Q2 N(174, 2018)  174.
##  3 Adelaide South Australia Business snaive 2018 Q3 N(185, 2018)  185.
##  4 Adelaide South Australia Business snaive 2018 Q4 N(197, 2018)  197.
##  5 Adelaide South Australia Business snaive 2019 Q1 N(129, 4036)  129.
##  6 Adelaide South Australia Business snaive 2019 Q2 N(174, 4036)  174.
##  7 Adelaide South Australia Business snaive 2019 Q3 N(185, 4036)  185.
##  8 Adelaide South Australia Business snaive 2019 Q4 N(197, 4036)  197.
##  9 Adelaide South Australia Business ets    2018 Q1 N(149, 1022)  149.
## 10 Adelaide South Australia Business ets    2018 Q2 N(173, 1406)  173.
## # … with 7,286 more rows

#c.Check the forecasts for the “Snowy Mountains” and “Melbourne” regions. Do they look reasonable?
fc %>%
  filter(Region == "Snowy Mountains") %>%
  autoplot(tourism)

fc %>%
  filter(Region == "Melbourne") %>%
  autoplot(tourism)

#14.For your retail time series (Exercise 5 above):
#a.develop an appropriate seasonal ARIMA model;
retail_ts<-ts(aus_retail$Turnover,start=c(1982,4),frequency=12)
retail_arima = forecast(arima(retail_ts))
#b.compare the forecasts with those you obtained in earlier chapters;
retail_hw = hw(retail_ts, seasonal = "multiplicative")
autoplot(retail_hw)

retail_hw_damped = hw(retail_ts, seasonal = "multiplicative", damped = TRUE)
autoplot(retail_hw_damped)

accuracy(retail_arima)

##                      ME     RMSE      MAE      MPE     MAPE    MASE      ACF1
## Training set -5.227e-13 318.0814 180.0452 -827.383 853.0138 8.89889 0.9852806

#15.
#a.
autoplot(pelt, Hare)

#b.
auto.arima(pelt$Hare)

## Series: pelt$Hare 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1       mean
##       1.3811  -0.7120  -0.5747  45143.349
## s.e.  0.1016   0.0784   0.1240   3242.191
## 
## sigma^2 estimated as 583278195:  log likelihood=-1046.07
## AIC=2102.15   AICc=2102.85   BIC=2114.7

#c.
acf(pelt)

pacf(pelt)

#d.
## 1936
## ϕ1=0.82, ϕ2=−0.29, ϕ3=−0.01,ϕ4=-0.22
##σ^2[1+α^2(h-1)]
## 1937
## ϕ1=0.82, ϕ2=−0.29, ϕ3=−0.01,ϕ4=-0.22
## 1938
## ϕ1=0.82, ϕ2=−0.29, ϕ3=−0.01,ϕ4=-0.22
#e.
pelt_ts <- ts(pelt$Hare)
forecast(arima(pelt_ts))

##     Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
##  92       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  93       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  94       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  95       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  96       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  97       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  98       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  99       45406.48 -953.6257 91766.59 -25495.19 116308.2
## 100       45406.48 -953.6257 91766.59 -25495.19 116308.2
## 101       45406.48 -953.6257 91766.59 -25495.19 116308.2

#16.
Switzerland <- global_economy %>% 
  filter(Country == "Switzerland")
sw_ts<-ts(Switzerland$Population)
autoplot(sw_ts)

auto.arima(ts(sw_ts))

## Series: ts(sw_ts) 
## ARIMA(0,2,1) 
## 
## Coefficients:
##          ma1
##       0.8765
## s.e.  0.0962
## 
## sigma^2 estimated as 130226144:  log likelihood=-602.65
## AIC=1209.3   AICc=1209.53   BIC=1213.35

ggtsdisplay(diff(sw_ts))

acf(sw_ts)

pacf(sw_ts)

swfc= forecast(Arima(sw_ts))
autoplot(swfc)

#17.
library(Quandl)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following object is masked from 'package:tsibble':
## 
##     index

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

#b.
autoplot(ibmclose) + ylab("Close Price") + xlab("Year")

ibmA <- auto.arima(ibmclose)
ibmA

## Series: ibmclose 
## ARIMA(0,1,0) 
## 
## sigma^2 estimated as 52.62:  log likelihood=-1251.37
## AIC=2504.74   AICc=2504.75   BIC=2508.64

#c.
checkresiduals(ibmA)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)
## Q* = 14.151, df = 10, p-value = 0.1662
## 
## Model df: 0.   Total lags used: 10

#d.
forecast(arima(ibmclose))

##     Point Forecast    Lo 80    Hi 80   Lo 95    Hi 95
## 370       478.4688 370.6839 586.2538 313.626 643.3117
## 371       478.4688 370.6839 586.2538 313.626 643.3117
## 372       478.4688 370.6839 586.2538 313.626 643.3117
## 373       478.4688 370.6839 586.2538 313.626 643.3117
## 374       478.4688 370.6839 586.2538 313.626 643.3117
## 375       478.4688 370.6839 586.2538 313.626 643.3117
## 376       478.4688 370.6839 586.2538 313.626 643.3117
## 377       478.4688 370.6839 586.2538 313.626 643.3117
## 378       478.4688 370.6839 586.2538 313.626 643.3117
## 379       478.4688 370.6839 586.2538 313.626 643.3117

plot(forecast(arima(ibmclose)))

#e.
ibm2 <- ets(ibmclose, model="ZZZ", lambda="auto")
ibm2

## ETS(A,N,N) 
## 
## Call:
##  ets(y = ibmclose, model = "ZZZ", lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 1.9999 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 109276.6822 
## 
##   sigma:  3191.536
## 
##      AIC     AICc      BIC 
## 8139.453 8139.518 8151.185

autoplot(ibm2)

#f.
checkresiduals(ibm2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 16.112, df = 8, p-value = 0.04081
## 
## Model df: 2.   Total lags used: 10

#g.
ibm2F <- forecast.ets(ibm2)
ibm2F

##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 370       356.9995 345.3475 368.2831 339.0172 374.1185
## 371       356.9995 340.4051 372.8561 331.2843 380.9830
## 372       356.9995 336.5634 376.3275 325.2259 386.1678
## 373       356.9995 333.2903 379.2294 320.0292 390.4853
## 374       356.9995 330.3797 381.7678 315.3798 394.2500
## 375       356.9995 327.7260 384.0482 311.1167 397.6229
## 376       356.9995 325.2667 386.1334 307.1441 400.6995
## 377       356.9995 322.9607 388.0642 303.3998 403.5421
## 378       356.9995 320.7798 389.8690 299.8405 406.1938
## 379       356.9995 318.7033 391.5683 296.4346 408.6860

autoplot(ibm2F) + ylab("Stock Close") + xlab("Year")

#h.
#ANN is more preferred.

Ch8&9 Exercises

Rachel Zhou

4/6/2022