HW2

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ stringr 1.4.0
## ✓ tidyr   1.2.0     ✓ forcats 0.5.1
## ✓ readr   2.1.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(fable)

## Loading required package: fabletools

library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(feasts)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:tsibble':
## 
##     interval

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(tsibbledata)
library(fpp3)
library(ggplot2)
library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(fma)

##Chapter 8 Exercise

##Q1

#Q1a)
fit <- aus_livestock %>%
  filter(State == "Victoria",
         Animal == "Pigs") %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N"))) 

report(fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

The the optimal values of α is 0.3221247 The the optimal values of ℓ0 is 100646.6

Forecasts for the next four months:

#Q1a)
fc <- fit %>%
  forecast(h = 4)

fc %>%
  autoplot(aus_livestock) +
  ggtitle("Number of Pigs Slaughtered in Victoria")

fc

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                          Month             Count  .mean
##   <fct>  <fct>    <chr>                           <mth>            <dist>  <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.

#Q1 b)
s <- residuals(fit)$.resid %>% sd()
hat_y <- fc$.mean[1]

fc %>% hilo(95) %>% pull('95%') %>% head(1)

## <hilo[1]>
## [1] [76854.79, 113518.3]95

95% prediction interval: [76854.79, 113518.3] The interval is a little bit smaller than the one produced by R.

##Q2

Own SES function:

ses1 <- function(y, alpha, level)
  {
  yhat <- numeric(length(y)+1)
  yhat[1] <- level
  for(i in 2:(length(yhat))) {
    yhat[i] <- alpha * y[i-1] + (1 - alpha) * yhat[i-1]
    }
  return(last(yhat))
}

forecast next observation

vic_pigs <- aus_livestock %>%
  filter(Animal == "Pigs", State == "Victoria") %>%
  pull(Count)
ses_fc <- vic_pigs %>%
  ses1(alpha = 0.3221, level = 100646.6) %>%
  round(0)
c(`Our own SES` = ses_fc, `fable::ETS()` = fc$Count[1])

## $`Our own SES`
## [1] 95187
## 
## $`fable::ETS()`
## N(95187, 8.7e+07)

Our own function gives the same forecast as ETS.

##Q3

The optim() function

ses2 <- function(par, y) {
    alpha <- par[1]
    level <- par[2]
    n <- length(y)
    yhat <- numeric(n)
    yhat[1] <- level
    for(i in 2:n) 
      {
      yhat[i] <- alpha * y[i-1] + (1 - alpha) * yhat[i-1]
      }
    return(sum((y - yhat)^2) %>%
                round(3)
              )    
              }

optim(c(0.00001, vic_pigs[1]), ses2, y=vic_pigs)$par

## [1] 3.236967e-01 9.422438e+04

The tidy() function

tidy(fit)

## # A tibble: 2 × 5
##   Animal State    .model                                          term  estimate
##   <fct>  <fct>    <chr>                                           <chr>    <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + sea… alpha  3.22e-1
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + sea… l[0]   1.01e+5

I didn’t get the same value because of the different starting values here.

##Q4

SES <- function(init_pars, data){
  # init_pars is c(init_alpha, init_l0)
  # make next observation forecast variable
  fc_next <- 0
  
  # make SSE function to get SSE if parameters and data are given.
  SSE <- function(pars, data){
    error <- 0
    SSE <- 0
    alpha <- pars[1]
    l0 <- pars[2]
    y_hat <- l0
    
    for(index in 1:length(data)){
      error <- data[index] - y_hat
      SSE <- SSE + error^2
      
      y_hat <- alpha*data[index] + (1 - alpha)*y_hat 
    }
    # use superassignment to make forecast value possible to use outside SSE function.
    fc_next <<- y_hat
    return(SSE)
  }
  
  # use optim function to get optimal values of alpha and l0.
  optim_pars <- optim(par = init_pars, data = data, fn = SSE)
  
  # return results
  return(list(
    Next_observation_forecast = fc_next,
    alpha = optim_pars$par[1],
    l0 = optim_pars$par[2]
    ))
}

SES(c(0.5, vic_pigs[1]), vic_pigs)

## $Next_observation_forecast
## [1] 95186.55
## 
## $alpha
## [1] 0.3221274
## 
## $l0
## [1] 99223.08

##Q5

#Q5a)
global_economy %>%
  filter(Country == "China") %>%
  autoplot(Exports) +
  labs(y="% of GDP", title="Exports: China")

From the plot we can see that the line has an increasing trend without seanality. The line grows rapidly since 1970s and has a large decrease after 2008s because of the financial recression.

#Q5b)
fit_chn <- global_economy %>%
  filter(Country == "China") %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N"))) 

fc_chn <- fit_chn %>%
  forecast(h = 5)

fc_chn %>%
  autoplot(global_economy) +
  labs(y="% of GDP", title="Exports: China", subtitle = "ETS(A,N,N)")

#Q5c)
schn <- accuracy(fit_chn)$RMSE
schn

## [1] 1.900172

#Q5d)
fit_chn_aan <- global_economy %>%
  filter(Country == "China") %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))

fc_chn_aan <- fit_chn_aan %>%
  forecast(h = 5)

fc_chn_aan %>%
  autoplot(global_economy) +
  labs(y="% of GDP", title="Exports: China", subtitle = "ETS(A,A,N)")

#Q5d)
schn2 <- accuracy(fit_chn_aan)$RMSE
schn2

## [1] 1.906436

In this case, ANN model captured the trend and the most weight is given to the most recent data, making the data future trends more in line with the current reality. AAN model combines with historical data trends, it has a very clear sense in capturing data trends.

#Q5e)

Compared with ANN model, AAN model has a more pessimistic forecast from the plot. The RMSE is lower in ETS(A,N,N), so in this case, i would like to say ANN forecasts better.

#Q5f)未写完

The confidence interval of the ETS(A,N,N) method:

s_chn <- residuals(fit_chn)$.resid %>% sd()
hat_y_chn <- fc_chn$.mean[1]

fc_chn %>% hilo(95) %>% pull('95%') %>% head(1)

## <hilo[1]>
## [1] [15.96718, 23.54756]95

s_chn

## [1] 1.897838

hat_y_chn

## [1] 19.75737

95% prediction interval: [15.96718, 23.54756] 95% prediction interval using RMSE:

The confidence interval of the ETS(A,A,N) method:

s_chn_aan <- residuals(fit_chn_aan)$.resid %>% sd()
hat_y_chn_aan <- fc_chn_aan$.mean[1]

fc_chn_aan %>% hilo(95) %>% pull('95%') %>% head(1)

## <hilo[1]>
## [1] [15.4931, 23.23803]95

s_chn_aan

## [1] 1.921155

hat_y_chn_aan

## [1] 19.36556

95% prediction interval: [15.4931, 23.23803] 95% prediction interval using RMSE:

##Q6

lambda <- global_economy %>%
  filter(Country == "China") %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

fit_ch <- global_economy %>%
  filter(Country == "China") %>%
  model(`Simple` = ETS(GDP ~ error("A") + trend("N") + season("N")),
        `Holt's method` = ETS(GDP ~ error("A") + trend("A") + season("N")),
        `Damped Holt's method` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.8) + season("N")),
        `Box-Cox` = ETS(box_cox(GDP,lambda) ~ error("A") + trend("A") + season("N")),
        `Box-Cox Damped` = ETS(box_cox(GDP,lambda) ~ error("A") + trend("Ad", phi = 0.8) + season("N")),
        `Log` = ETS(log(GDP) ~ error("A") + trend("A") + season("N")),
        `Log Damped` = ETS(log(GDP) ~ error("A") + trend("Ad", phi = 0.8) + season("N"))
        ) 

fc_ch <- fit_ch %>%
  forecast(h = 15)

fc_ch %>%
  autoplot(global_economy, level = NULL) +
  labs(title="GDP: China") +
  guides(colour = guide_legend(title = "Forecast"))

As i discovered above, China’s GDP has an increasing trend, but does not show seasonality. Log and Box-Cox transformations tend to over forecast along with h increase. I chose ϕ as 0.8 since 0.9 looks like not realistic. Damped log and Holt’s Method at ϕ=0.8 look like have similar effects. Damped Holt’s method is a little bit more realistic than simple exponential method in this case.

##Q7

fit_gas <- aus_production %>%
  model(additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
        multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
        `damped multiplicative` = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M")))

aus_production %>%
   model(additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
        multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M"))) %>%
   forecast(h=20) %>%
  autoplot(aus_production, level = NULL) +
  labs(title="Australian Gas Production") +
  guides(colour = guide_legend(title = "Forecast"),
         subtitle="Additive vs. Multiplicative Seasonality")

aus_production %>%
  model(multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
        `damped multiplicative` = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M"))
        ) %>%
  forecast(h=20) %>%
  autoplot(aus_production, level= NULL) +
  labs(title="Australian Gas Production") +
  guides(colour = guide_legend(title = "Forecast"),
         Subtitle = "Additive vs Damped Trend")

report(fit_gas)

## Warning in report.mdl_df(fit_gas): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 3 × 9
##   .model                  sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>                    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 additive              23.6       -927. 1872. 1873. 1903.  22.7  29.7 3.35  
## 2 multiplicative         0.00324   -831. 1681. 1682. 1711.  21.1  32.2 0.0413
## 3 damped multiplicative  0.00340   -835. 1688. 1689. 1719.  21.0  32.4 0.0424

accuracy(fit_gas)

## # A tibble: 3 × 10
##   .model            .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE     ACF1
##   <chr>             <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 additive          Trai…  0.00525  4.76  3.35 -4.69  10.9  0.600 0.628  0.0772 
## 2 multiplicative    Trai… -0.115    4.60  3.02  0.199  4.08 0.542 0.606 -0.0131 
## 3 damped multiplic… Trai…  0.255    4.58  3.04  0.655  4.15 0.545 0.604 -0.00451

Multiplicative seasonality is necessary here because the change in seasonal patterns appears to be proportional to the level of the time series.We can see that the multiplicative has smaller AICc than damped multiplicative. So, we can’t say that damped multiplicative improved the forecasts here. However, in the rmse comparison, the RMSE of the damped multiplicative approach is lower than the RMSE of the multiplicative approach, where again the damped multiplicative approach does improve the prediction.

##Q8

set.seed(12345678)
myseries <- aus_retail %>%
  filter(`Series ID` == "A3349337W")

myseries%>% autoplot(Turnover)

#a)

From the plot above, we can see that the multiplicative seasonality is necessary here because the change in seasonal patterns appears to be proportional to the level of the time series.

#b)

fit_myseries <- myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")),
        `damped multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))) 

fit_myseries %>%
  forecast(h=36) %>%
  autoplot(myseries, level = NULL) +
  labs(title="Australian Retail Turnover") +
  guides(colour = guide_legend(title = "Forecast"))

From the plot, we can see that damped multiplicative method’s increasing trend seems to be lower than the previous trend. The multiplicative looks more in line with the trend and may also face the problem of over-forest.

#c)

accuracy(fit_myseries) %>%
  select(.model, RMSE)

## # A tibble: 2 × 2
##   .model                 RMSE
##   <chr>                 <dbl>
## 1 multiplicative         15.0
## 2 damped multiplicative  14.6

Multiplicative method has slight advantages here since it has lower RMSE. So i prefer to choose multiplicative method here.

#d)

myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  gg_tsresiduals() +
  ggtitle("Multiplicative Method")

#Box-Pierce test, ℓ=2m for seasonal data, m=12
myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  augment() %>% 
  features(.innov, box_pierce, lag = 24, dof = 0)

## # A tibble: 1 × 5
##   State           Industry                              .model bp_stat bp_pvalue
##   <chr>           <chr>                                 <chr>    <dbl>     <dbl>
## 1 New South Wales Hardware, building and garden suppli… multi…    71.5   1.30e-6

#Ljung-Box test
myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  augment() %>% 
  features(.innov, ljung_box, lag = 24, dof = 0)

## # A tibble: 1 × 5
##   State           Industry                              .model lb_stat lb_pvalue
##   <chr>           <chr>                                 <chr>    <dbl>     <dbl>
## 1 New South Wales Hardware, building and garden suppli… multi…    73.2   7.20e-7

From above methods, I agree that the residual do look like white noise. Almost all of the results are not significant at 5%.

#e)

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

fit_train <- myseries_train %>%
  model(multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
        snaive = SNAIVE(Turnover))

#producing forecasts
fc <- fit_train %>%
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

fc %>% autoplot(myseries, level = NULL)

accuracy(fit_train) %>%
  select(.type, .model, RMSE)

## # A tibble: 2 × 3
##   .type    .model  RMSE
##   <chr>    <chr>  <dbl>
## 1 Training multi   13.5
## 2 Training snaive  26.3

fc %>% accuracy(myseries)  %>%
  select(.type, .model, RMSE)

## # A tibble: 2 × 3
##   .type .model  RMSE
##   <chr> <chr>  <dbl>
## 1 Test  multi   43.8
## 2 Test  snaive  98.6

The multiplicative method has better performance here since it has lower RMSE.

##Q9

lambda <- myseries_train %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)
ts_bc <- myseries_train %>%
  mutate(
    bc = box_cox(Turnover, lambda)
  )

# Model use box-cox transformation
fit <- ts_bc %>%
  model(
    'STL (BoxCox)' = STL(bc ~ season(window = "periodic"),
             robust = T),
    'ETS (BoxCox)' = ETS(bc)
     )

# Our best previous model is multiplicative method
fit_best <-ts_bc %>%
  model(
    "Holt-Winters' Multiplicative" = ETS(Turnover ~ error("M") + 
                                                    trend("A") +
                                                    season("M"))
                                                    )

rbind(accuracy(fit),accuracy(fit_best))

## # A tibble: 3 × 12
##   State  Industry .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>    <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 New S… Hardwar… STL (… Trai…  0.320  12.9  8.67 -0.399  5.40 0.421 0.507 0.355
## 2 New S… Hardwar… ETS (… Trai… -1.74   13.2  9.37 -1.47   5.85 0.455 0.519 0.221
## 3 New S… Hardwar… Holt-… Trai… -1.14   13.5  9.55 -0.912  5.79 0.450 0.515 0.247

We can see that STL decomposition applied to the Box-Cox transformed series has the lowest RMSE and it performs better than our best previous forecast.

##Q10

#a)

aus_trips <- tourism %>%
  summarise(Trips = sum(Trips))

aus_trips %>%
  autoplot(Trips)

We can see that the data has seasonality and has a strong increasing trend since 2010s.

#b)

stl_dcmp <- aus_trips %>%
  model(STL(Trips)) %>% components()

stl_dcmp %>%
  as_tsibble() %>%
  autoplot(season_adjust)

#c)

aus_trips %>%
  model(
    decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A") + 
                                            trend("Ad") + 
                                            season("N")
                            )
                        )
    ) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trips)

#d)

aus_trips %>%
  model(
    decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A") + 
                                            trend("A") + 
                                            season("N")
                            )
                        )
    ) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trips)

#e)

aus_trips %>%
  model(ETS(Trips)) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trips)

#f)

fit <- aus_trips %>%
  model(
    STL_AAdN = decomposition_model(STL(Trips),
                                   ETS(season_adjust ~ error("A") + 
                                                       trend("Ad") + 
                                                       season("N"))),
    STL_AAN = decomposition_model(STL(Trips),
                                  ETS(season_adjust ~ error("A") + 
                                                      trend("A") + 
                                                      season("N"))),
    ETS = ETS(Trips)
    )

accuracy(fit)

## # A tibble: 3 × 10
##   .model   .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>    <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 STL_AAdN Training 103.   763.  576. 0.367  2.72 0.607 0.629 -0.0174 
## 2 STL_AAN  Training  99.7  763.  574. 0.359  2.71 0.604 0.628 -0.0182 
## 3 ETS      Training 105.   794.  604. 0.379  2.86 0.636 0.653 -0.00151

We can see that ANN gives the best in-sample fits since it has lowest RMSE.

#g)

fit %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trips, level = NULL) +
  guides(colour=guide_legend(title="Forecast"))

From the plot, we can see that AAN method is between ETS and AAdN method. the three methods have similar trend forecast and do not have so much difference. Personally, if only one have to be chosen, i think AAN method seems most reasonable.

#h)

best_model <- fit %>%
  select(STL_AAN) 
best_model %>%
  gg_tsresiduals()

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

augment(best_model) %>%
  features(.resid, ljung_box, lag = 24, dof = 5)

## # A tibble: 1 × 3
##   .model  lb_stat lb_pvalue
##   <chr>     <dbl>     <dbl>
## 1 STL_AAN    33.3    0.0221

Here i use ljung_box to test whether the residuals is a white noise type. Although from the plot it looks like the residuals are a white noise type but it contradict with the low p value. I think the reason of the contradiction is that the large spike at lag 14.

##Q11

#a)

nz_visit <- aus_arrivals %>% filter(Origin == 'NZ')

autoplot(nz_visit, Arrivals) +
  ggtitle("Arrivals from New Zealand")

The data has an increasing trend with seasonality.

#b)

train <- nz_visit %>%
  slice(1:(nrow(nz_visit)-(4*2)))   
# 4*2 because the data is quarterly divided, then 2 years mean 2 times 4 quarters.

train %>%
  model(
    ETS(Arrivals ~ error("M") +
                   trend("A") +
                   season("M"))
  ) %>%
  forecast(h="2 years") %>%
  autoplot(level = NULL) + 
  autolayer(nz_visit, Arrivals) +
  labs(title = "New Zealand Vistors Train forecast VS the actual data.")

#c)

Multiplicative seasonality is necessary here because the change in seasonal patterns appears to be proportional to the level of the time series.

#d)

compare <- anti_join(nz_visit, train,
                     by = c("Quarter", "Origin", "Arrivals"))

fit <- train %>%
  model(
    'ETS model' = ETS(Arrivals),
    'Additive to a Log Transformed' = ETS(log(Arrivals) ~ error("A") +
                                                          trend("A") +
                                                          season("A")),
    'Seasonal Naïve Method' = SNAIVE(Arrivals),
    'STL decomposition applied to the log transformed' = decomposition_model(STL(log(Arrivals)), 
                                                                             ETS(season_adjust))
  ) 

fc <- fit %>%
  forecast(h="2 years") 

fc %>%
  autoplot(level = NULL) + 
  autolayer(compare, Arrivals) +
  guides(colour=guide_legend(title="Methods")) +
  labs(title = "New Zealand Visitors Train Forecast VS the Actual Data.")

#e)

fc %>% accuracy(nz_visit)

## # A tibble: 4 × 11
##   .model     Origin .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>      <chr>  <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Additive … NZ     Test   -7093. 17765. 12851. -2.17   4.20 0.864 0.918  0.0352
## 2 ETS model  NZ     Test   -3495. 14913. 11421. -0.964  3.78 0.768 0.771 -0.0260
## 3 Seasonal … NZ     Test    9709. 18051. 17156.  3.44   5.80 1.15  0.933 -0.239 
## 4 STL decom… NZ     Test  -12535. 22723. 16172. -4.02   5.23 1.09  1.17   0.109

In this case, it looks like ETS method gives the best forecast since it has the lowest RMSE here.

best_model <- fit %>% select('ETS model')
best_model %>%
  gg_tsresiduals()

augment(best_model) %>%
  features(.resid, ljung_box)

## # A tibble: 1 × 3
##   .model    lb_stat lb_pvalue
##   <chr>       <dbl>     <dbl>
## 1 ETS model    1.29     0.256

Then i use acf plot and ljung box to test the residuals of the ETS method. We can see that all of the lag in ACF plot is under the bound and the ljung box pvalue is more than 0.05. So, the ETS method passes the residual test.

#f)

nz_cv <- nz_visit %>%
  slice(1:(n() - 3)) %>%
  stretch_tsibble(.init = 36, .step = 3)

nz_cv %>% 
  model(
    'ETS model' = ETS(Arrivals),
    'Additive to a Log Transformed' = ETS(log(Arrivals) ~ error("A") +
                                                          trend("A") +
                                                          season("A")),
    'Seasonal Naïve Method' = SNAIVE(Arrivals),
    'STL decomposition applied to the log transformed' = decomposition_model(STL(log(Arrivals)), 
                                                                             ETS(season_adjust))
  ) %>%
  forecast(h = 3) %>%
  accuracy(nz_visit)

## # A tibble: 4 × 11
##   .model          Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>           <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive to a … NZ     Test  -375. 14347. 11021. 0.200  5.83 0.741 0.746 0.309
## 2 ETS model       NZ     Test  4627. 15327. 11799. 2.23   6.45 0.793 0.797 0.283
## 3 Seasonal Naïve… NZ     Test  8244. 18768. 14422. 3.83   7.76 0.970 0.976 0.566
## 4 STL decomposit… NZ     Test  4252. 15618. 11873. 2.04   6.25 0.798 0.812 0.244

Using time series cross-validation, Additive to a Log Transformed method becomes the best forecast.

##Q12

#a)

cement_cv <- aus_production %>%
  slice(1:(n()-4)) %>%    # '-4' for a year forecast                         
  stretch_tsibble(.init = 5*4, .step = 1)                   


fc <- cement_cv %>%
  model(ETS(Cement), 
        SNAIVE(Cement)
        ) %>%
  forecast(h = "1 year")

#b)

fc %>%
  group_by(.id, .model) %>%
  mutate(h = row_number()) %>%
  ungroup() %>%
  accuracy(aus_production, by = c(".model", "h"))

## Error in accuracy.default(., aus_production, by = c(".model", "h")): First argument should be a forecast object or a time series.

##Q13

Plot data of Beer and bricks production:

aus_production %>% autoplot(Beer)

We don’t need box-cox transformation in Beer production data since there is no variance problem spotted.

fc <- aus_production %>%
  slice(1:(nrow(aus_production)-12)) %>%
  model(
    'ETS' = ETS(Beer), 
    'Seasonal Naïve' = SNAIVE(Beer),
    'STL Decomposition' = decomposition_model(STL(log(Beer)),
                                              ETS(season_adjust))
    ) %>%
  forecast(h = "3 years")

fc %>% 
  autoplot(filter_index(aus_production, "2000 Q1" ~ .),level=NULL) +
  guides(colour=guide_legend(title="Methods")) +
  ggtitle("Beer production from aus_production")

fc %>% accuracy(aus_production)

## # A tibble: 3 × 10
##   .model            .type     ME  RMSE   MAE      MPE  MAPE  MASE RMSSE  ACF1
##   <chr>             <chr>  <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS               Test   0.127  9.62  8.92  0.00998  2.13 0.563 0.489 0.376
## 2 Seasonal Naïve    Test  -2.92  10.8   9.75 -0.651    2.34 0.616 0.549 0.325
## 3 STL Decomposition Test  -2.85   9.87  8.95 -0.719    2.16 0.565 0.502 0.283

In the beer production case, we can see that ETS method has the best forecast because it has lowest RMSE.

tidy_bricks <- aus_production %>%
  filter(!is.na(Bricks))

tidy_bricks %>% autoplot(Bricks) + 
labs(title = "Australian Quarterly Brick Production")

We don’t need box-cox transformation in Beer production data since there is no variance problem spotted.

fc <- tidy_bricks %>%
  slice(1:(nrow(tidy_bricks)-12)) %>%
  model(
    'ETS' = ETS(Bricks), 
    'Seasonal Naïve' = SNAIVE(Bricks),
    'STL Decomposition' = decomposition_model(STL(log(Bricks)),
                                              ETS(season_adjust))
    ) %>%
  forecast(h = "3 years")

fc %>% 
  autoplot(filter_index(tidy_bricks, "1980 Q1" ~ .),level=NULL) +
  guides(colour=guide_legend(title="Methods")) +
  ggtitle("Bricks production from Australia")

fc %>% accuracy(tidy_bricks)

## # A tibble: 3 × 10
##   .model            .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>             <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS               Test   2.27  17.5  13.2 0.474  3.31 0.365 0.354  0.339 
## 2 Seasonal Naïve    Test  32.6   36.5  32.6 7.85   7.85 0.898 0.739 -0.322 
## 3 STL Decomposition Test   9.71  18.7  14.9 2.29   3.65 0.411 0.378  0.0933

I’m confused by what happened here.All numbers become NaN.

Cost of drug subsidies for diabetes (ATC2 == “A10”) and corticosteroids (ATC2 == “H02”) from PBS：

subsidies <- PBS %>%
  filter(ATC2 %in% c("A10", "H02")) %>%
  group_by(ATC2) %>%
  summarise(Cost = sum(Cost))

subsidies %>% 
  autoplot(Cost) +
  facet_grid(vars(ATC2), scales = "free_y") +
  ggtitle("Cost of drug subsidies for diabetes and corticosteroids")

We need to do a box-cox transformation for STL because the variance problem is spotted here. We also need to split the data because of different lambda.

# Diabetes
diabet <- subsidies %>%
  filter(ATC2 %in% "A10")

lambda1 <- diabet %>%
  features(Cost, features = guerrero) %>%
  pull(lambda_guerrero)

fc1 <- diabet %>%
  filter(Month < max(Month) - 35) %>%
  model(
    'ETS' = ETS(Cost), 
    'Seasonal Naïve' = SNAIVE(Cost), 
    'STL Decomposition' = decomposition_model(STL(box_cox(log(Cost), lambda1)),
                                              ETS(season_adjust))
  ) %>%
  forecast(h = "3 years")

# Corticosteroids
corti <- subsidies %>%
  filter(ATC2 %in% "H02")

lambda2 <- corti %>%
  features(Cost, features = guerrero) %>%
  pull(lambda_guerrero)

fc2 <- corti %>%
  filter(Month < max(Month) - 35) %>%
  model(
    'ETS' = ETS(Cost), 
    'Seasonal Naïve' = SNAIVE(Cost), 
    'STL Decomposition' = decomposition_model(STL(box_cox(log(Cost), lambda2)),
                                              ETS(season_adjust))
  ) %>%
  forecast(h = "3 years")

# Combine the data again
fc <- bind_rows(fc1,fc2)
fc %>% autoplot(subsidies, level=NULL) +
  guides(colour=guide_legend(title="Forecast")) +
  ggtitle("Three years foreasting for diabetes and corticosteroids ")

fc %>% 
  accuracy(subsidies) %>%
  arrange(ATC2)

## # A tibble: 6 × 11
##   .model      ATC2  .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>       <chr> <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS         A10   Test   1.38e6 2.36e6 1.85e6  5.83   8.74 1.89  2.00   0.177 
## 2 Seasonal N… A10   Test   4.32e6 5.18e6 4.33e6 19.8   19.9  4.42  4.40   0.638 
## 3 STL Decomp… A10   Test   9.41e4 1.57e6 1.29e6 -0.269  6.63 1.32  1.33  -0.153 
## 4 ETS         H02   Test   2.70e4 7.65e4 6.45e4  1.99   7.05 1.09  1.07  -0.0990
## 5 Seasonal N… H02   Test  -1.48e4 8.55e4 7.16e4 -1.31   7.88 1.21  1.20   0.0226
## 6 STL Decomp… H02   Test   2.27e4 6.83e4 5.63e4  1.66   6.24 0.952 0.960 -0.219

STL decomposition is the best forecast method for A10 and ETS is the best forecast method for H02.

Total food retailing turnover for Australia from aus_retail:

food <- aus_retail %>%
  filter(Industry == "Food retailing") %>%
  summarise(Turnover = sum(Turnover))
autoplot(food, Turnover) +
  ggtitle('Total food retailing turnover for Australia')

We need to do a box-cox transformation for STL because the variance problem is spotted here.

lambda <- food %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

fc <- food %>%
  filter(Month < max(Month) - 35) %>%
  model(
    'ETS' = ETS(Turnover), 
    'Seasonal Naïve' = SNAIVE(Turnover), 
    'STL Decomposition' = decomposition_model(STL(box_cox(log(Turnover), lambda)),
                                              ETS(season_adjust))
  ) %>%
  forecast(h = "3 years")

fc %>% 
  autoplot(filter_index(food, "2005 Jan"~ .), level=NULL)+
  guides(colour=guide_legend(title="Forecast")) +
  ggtitle("Three years foreasting for Australia's Total food retailing turnover ")

fc %>% accuracy(food)

## # A tibble: 3 × 10
##   .model            .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>             <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS               Test  -151.    194.  170. -1.47   1.65 0.639 0.634 0.109
## 2 Seasonal Naïve    Test   625.    699.  625.  5.86   5.86 2.35  2.29  0.736
## 3 STL Decomposition Test    -8.34  155.  132. -0.135  1.24 0.496 0.506 0.256

We can see that STL Decomposition is the best forecast method for Australia’s Total food retailing turnover.

##Q14

#a)

Total number of trips across Australia:

aus_trips <- tourism %>%
  summarise(Trips = sum(Trips))

fit <- aus_trips %>%
  model(ETS(Trips))

report(fit)

## Series: Trips 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.4495675 
##     beta  = 0.04450178 
##     gamma = 0.0001000075 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]
##  21689.64 -58.46946 -125.8548 -816.3416 -324.5553 1266.752
## 
##   sigma^2:  699901.4
## 
##      AIC     AICc      BIC 
## 1436.829 1439.400 1458.267

fit %>%
  forecast() %>%
  autoplot(aus_trips) +
  ggtitle("Forecasting of total number of trips across Australia")

The closing prices for the four stocks:

gafa_stock %>%
  autoplot(Close) +
  facet_grid(vars(Symbol), scales = "free_y")

gafa_regular <- gafa_stock %>%
  group_by(Symbol) %>%
  mutate(trading_day = row_number()) %>%
  ungroup() %>%
  as_tsibble(index = trading_day, regular = TRUE)

fit <- gafa_regular %>%
  model(
    ETS(Close)
  )

report(fit)

## Warning in report.mdl_df(fit): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.

## # A tibble: 4 × 10
##   Symbol .model       sigma2 log_lik    AIC   AICc    BIC    MSE   AMSE    MAE
##   <chr>  <chr>         <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 AAPL   ETS(Close) 0.000228  -5299. 10604. 10604. 10620.   4.39   8.96 0.0106
## 2 AMZN   ETS(Close) 0.000383  -7778. 15561. 15561. 15577. 359.   700.   0.0129
## 3 FB     ETS(Close) 0.000356  -5442. 10890. 10890. 10906.   5.82  11.3  0.0126
## 4 GOOG   ETS(Close) 0.000219  -7529. 15064. 15064. 15079. 144.   291.   0.0102

fit %>%
  forecast(h=50) %>%
  autoplot(gafa_regular %>% group_by_key() %>% slice((n() - 100):n()))

## `mutate_if()` ignored the following grouping variables:
## • Column `Symbol`

The lynx series:

pelt %>%
  model(
    ETS(Lynx)
    ) %>%
  report()

## Series: Lynx 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9998998 
## 
##   Initial states:
##      l[0]
##  25737.12
## 
##   sigma^2:  162584145
## 
##      AIC     AICc      BIC 
## 2134.976 2135.252 2142.509

pelt %>%
  model(
    ETS(Lynx)
    ) %>%
  forecast(h=10) %>%
  autoplot(pelt) +
  ggtitle("Forecasting of PELT trading records.")

I think ETS model performs good forecast in the first two series but it doesn’t look good in the third series. The main reason can be the ETS model can not handle cyclic data in the lynx series.

#b)

In the third series above, the ETS model does not work well because the ETS model can not work in cyclic data.

##Q15

usdeaths.ets <- ets(usdeaths, model="MAM")
usdeaths.etsF <- forecast(usdeaths.ets, h = 1)
usdeaths.HWM <- hw(usdeaths, seasonal = 'multiplicative', h=1)
usdeaths.etsF$mean

##           Jan
## 1979 8233.107

usdeaths.HWM$mean

##          Jan
## 1979 8217.64

##Q16

ibmclose.ets <- ets(ibmclose, model = "ANN")
ibmclose.ets.fc <- forecast(ibmclose.ets,h=1)
ibmclose.ets.fc

##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 370       356.9995 347.6907 366.3083 342.7629 371.2361

#alpha is 0.9999, sigma is 7.2637
alpha<-0.9999
sigma<-7.2637
h<-2

ibmclose.ets.fc$mean[1]-((sigma)*(1+(alpha^2)*(h-1)))

## [1] 342.4736

ibmclose.ets.fc$lower[1,'95%']

##      95% 
## 342.7629

##Q17

point forecast = ℓT + ( 1 − α ) ∗ ℓt|t−1 interval = σ2[1 + α2 ∗ (h − 1)] Upper = ℓT + (1 − α) ∗ ℓt|t−1 + ( σ2[1 + α2 ∗ (h − 1)]) Lower = ℓT + (1 − α) ∗ ℓt|t−1 − ( σ2[1 + α2 ∗ (h − 1)])

##Chapter 9 Exercise

##Q1

#a)

Difference: The length between upper boundary and lower boundary is different. These figures all have their own significant level.

The differences here are almost the 95% limits and the data are all white noise.

#b)

The critical values at different distances from the mean of zero because the critical values are calculated as ±1.96√T, where T is the length of the time series. The critical will decrease along with the increase of T.The autocorrelations different in each figure when they each refer to white noise because there are large amount of random numbers. This will decrease the chance of autocorrelation.

##Q2

gafa_stock %>%
  filter(Symbol == "AMZN") %>%
  gg_tsdisplay(Close, plot_type='partial') +
  labs(title = "Amazon's Daily Closing Stock Price")

## Warning: Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
## Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.

gafa_stock %>%
  filter(Symbol == "AMZN") %>%
  features(Close, unitroot_ndiffs)

## # A tibble: 1 × 2
##   Symbol ndiffs
##   <chr>   <int>
## 1 AMZN        1

The plot of the daily closing price shows that this series has an increasing trend which makes it non-stationary. The ACF plot shows that the trend decreases slowly and r1 is large and positive. However, the stationary time series will drop to 0 rapidly. The PACF shows that the first lag is around 1 which makes it non-stationary. Based on the KPSS test, the series should be differenced once.

##Q3

Turkish GDP:

# plot
global_economy %>%
  filter(Country == "Turkey") %>%
  gg_tsdisplay(GDP, plot_type='partial') +
  labs(title = "Turkey's Non-transformed GDP")

# calculate lambda
lambda <- global_economy %>%
  filter(Country == "Turkey") %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

# unit root test
global_economy %>%
  filter(Country == "Turkey") %>%
  features(box_cox(GDP,lambda), unitroot_ndiffs)

## # A tibble: 1 × 2
##   Country ndiffs
##   <fct>    <int>
## 1 Turkey       1

# transformed plot
global_economy %>%
  filter(Country == "Turkey") %>%
  gg_tsdisplay(difference(box_cox(GDP,lambda)), plot_type='partial') +
  labs(title = latex2exp::TeX(paste0("Turkey's Differenced GDP with $\\lambda$ = ",
         round(lambda,2))))

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

According to the plot, we can see that the original data is non-stationary and it need to be differenced once based on the KPSS test.

Accommodation takings in the state of Tasmania:

# plot
aus_accommodation %>%
  filter(State == "Tasmania") %>%
  gg_tsdisplay(Takings, plot_type='partial') +
  labs(title = "Non-transformed Tasmania Accomodation Takings")

# calculate lambda
lambda <-aus_accommodation %>%
  filter(State == "Tasmania") %>%
  features(Takings, features = guerrero) %>%
  pull(lambda_guerrero)

#unit root test
aus_accommodation %>%
  filter(State == "Tasmania") %>%
  features(box_cox(Takings,lambda), unitroot_nsdiffs)

## # A tibble: 1 × 2
##   State    nsdiffs
##   <chr>      <int>
## 1 Tasmania       1

# transformed plot
aus_accommodation %>%
  filter(State == "Tasmania") %>%
  gg_tsdisplay(difference(box_cox(Takings,lambda), 4), plot_type='partial') +
  labs(title = latex2exp::TeX(paste0("Differenced Tasmania Accomodation Takings with $\\lambda$ = ",
         round(lambda,2))))

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

The data has increasing trend which makes it non-stationary and it need to be differenced once according to KPSS test.

Monthly sales:

# plot
souvenirs %>%
  gg_tsdisplay(Sales, plot_type='partial', lag = 36) +
  labs(title = "Non-transformed Monthly Souvenir Sales")

# calculate lambda
lambda <- souvenirs %>%
  features(Sales, features = guerrero) %>%
  pull(lambda_guerrero)

# unit root test
souvenirs %>%
  features(box_cox(Sales,lambda), unitroot_nsdiffs)

## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       1

# transformed plot
souvenirs %>%
  gg_tsdisplay(difference(box_cox(Sales,lambda), 12), plot_type='partial', lag = 36) +
  labs(title = latex2exp::TeX(paste0("Differenced Monthly Souvenir Sales with $\\lambda$ = ",
         round(lambda,2))))

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

The data has seasonality which makes it non-stationary and it need to be differenced once according to KPSS test. The lambda of close to 0 suggests a natural log transformation. The plot looks more stationary after been differenced once.

##Q4

B(ln(yt))

##Q5

set.seed(1234)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1)) 

# plot
myseries %>%
  gg_tsdisplay(Turnover, plot_type='partial', lag = 36) +
  labs(title = "Non-transformed Retail Turnover")

# lambda calculation
lambda <- myseries %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

# unit root test
myseries %>%
  features(box_cox(Turnover, lambda), unitroot_nsdiffs)

## # A tibble: 1 × 3
##   State    Industry                                      nsdiffs
##   <chr>    <chr>                                           <int>
## 1 Tasmania Cafes, restaurants and takeaway food services       1

# transformed plot
myseries %>%
  gg_tsdisplay(difference(box_cox(Turnover,lambda), 12), plot_type='partial', lag = 36) +
  labs(title = latex2exp::TeX(paste0("Differenced Tasmania Accomodation Takings with $\\lambda$ = ",
         round(lambda,2))))

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

The original data has an increasing trend which makes it non-stationary and it need to be differenced once based on KPSS test. Then a box-cox transformation was applied to transform the data. After differencing once and transforming the data, the plots show that it is more stationary than before. The lines are more centered around zero and the ACF and PACF plots show stationarity.

##Q6

#a)

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)

#b)

sim %>%
  autoplot(y) +
  labs(title = latex2exp::TeX(" AR(1) model with $\\phi_i$ = 0.6"))

# phi_1 = 1
for(i in 2:100)
  y[i] <- 1*y[i-1] + e[i]

tsibble(idx = seq_len(100), y = y, index = idx) %>%
  autoplot(y) +
  labs(title = latex2exp::TeX(" AR(1) model with $\\phi_i$ = 1"))

# phi_1 = 0
for(i in 2:100)
  y[i] <- 0*y[i-1] + e[i]

tsibble(idx = seq_len(100), y = y, index = idx) %>%
  autoplot(y) +
  labs(title = latex2exp::TeX(" AR(1) model with $\\phi_i$ = 0"))

# phi_1 = -0.6
for(i in 2:100)
  y[i] <- -0.6*y[i-1] + e[i]

tsibble(idx = seq_len(100), y = y, index = idx) %>%
  autoplot(y) +
  labs(title = latex2exp::TeX(" AR(1) model with $\\phi_i$ = -0.6"))

From above plot, we can see that changing ϕ1 will get different time series patterns. When ϕ1 = 1, it looks like a random walk; when ϕ1 = 0, it looks like white noise; when ϕ1 becomes negative, it tends to oscillate around the mean. Along with the decrease of ϕ1, more spikes are showing.

#c)

for(i in 2:100)
  y[i] <- e[i] + 0.6 * e[i-1]

sim2 <- tsibble(idx = seq_len(100), y = y, index = idx)

#d)

sim2 %>%
  autoplot(y) +
  labs(title = latex2exp::TeX(" MA(1) model with $\\theta_i$ = 0.6"))

# theta_1 = 1
for(i in 2:100)
  y[i] <- e[i] + 1 * e[i-1]

tsibble(idx = seq_len(100), y = y, index = idx) %>%
  autoplot(y) +
  labs(title = latex2exp::TeX(" MA(1) model with $\\theta_1$ = 1"))

# theta_1 = 0
for(i in 2:100)
  y[i] <- e[i] + 0 * e[i-1]

tsibble(idx = seq_len(100), y = y, index = idx) %>%
  autoplot(y) +
  labs(title = latex2exp::TeX(" MA(1) model with $\\theta_1$ = 0"))

# theta_1 = -0.6
for(i in 2:100)
  y[i] <- e[i] + -0.6 * e[i-1]

tsibble(idx = seq_len(100), y = y, index = idx) %>%
  autoplot(y) +
  labs(title = latex2exp::TeX(" MA(1) model with $\\theta_1$ = -0.6"))

We can see that along with the decrease of θ1, more spikes are showing and the line more concentrated around the mean.The shapes of the graphs doesn’t change too much because they have similar magnitude.

#e)

set.seed(1234)
y <- numeric(100)

for(i in 2:100)
  y[i] <- e[i] + 0.6 * e[i-1] + 0.6 * y[i-1]

arma1_1 <- tsibble(idx = seq_len(100), y = y, index = idx)

#f)

set.seed(1234)
y <- numeric(100)

for(i in 3:100)
  y[i] <- -0.8 * y[i-1] + 0.3 * y[i-2] + e[i]

ar2 <- tsibble(idx = seq_len(100), y = y, index = idx)

#g)

arma1_1 %>%
  gg_tsdisplay(y, plot_type='partial') +
  labs(title = latex2exp::TeX("ARMA(1,1) model with $\\phi_1 = 0.6$, $\\theta_1 = 0.6$, and $\\sigma^2 = 1$"))

ar2 %>%
  gg_tsdisplay(y, plot_type='partial') +
  labs(title = latex2exp::TeX("AR(2) model with $\\phi_1 = -0.8$, $\\phi_2 = 0.3$, and $\\sigma^2 = 1$"))

The ARIMA Model tend to be stationary as it decreases rapidly in the acf plot. The AR model is not stationary since it has apparent trends in the plot. The acf plot in AR model also proves this, almost all the lags are over boundaries. The PACF plot show a negative first lag and nothing afterwards. Here, ϕ2−ϕ1<1 is not satisfied.

##Q7

#a)

fit <- aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(ARIMA(Passengers)) 

report(fit)

## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8756
## s.e.   0.0722
## 
## sigma^2 estimated as 4.671:  log likelihood=-87.8
## AIC=179.61   AICc=179.93   BIC=182.99

fit %>% 
  forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(title = "Australian Aircraft Passengers with ARIMA(0,2,1)", y = "Passengers (in millions)")

fit %>% 
  gg_tsresiduals() +
  labs(title = "Residuals for ARIMA(0,2,1)")

In this case, ARIMA (0,2,1) was selected and the residuals do resemble white noise. All the figures in the acf plot are between the boundaries.

#b)

yt=−0.87εt−1+εt

(1−B)2yt=(1−0.87B)εt

#c)

fit2 <-aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(ARIMA(Passengers ~ pdq(0,1,0)))

fit2 %>% 
  forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(title = "Australian Aircraft Passengers with ARIMA(0,1,0)", y = "Passengers (in millions)")

fit2 %>% 
  gg_tsresiduals() +
  labs(title = "Residuals for ARIMA(0,1,0)")

From the plot, we can see that the predictions of the model in part c are significantly lower than those of the model in part a.

#d)

fit3 <-aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(ARIMA(Passengers ~ pdq(2,1,2)))

fit3 %>% 
  forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(title = "Australian Aircraft Passengers with ARIMA(2,1,2)", y = "Passengers (in millions)")

fit3 %>% 
  gg_tsresiduals() +
  labs(title = "Residuals for ARIMA(2,1,2)")

#removing constant
fit4 <-aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(ARIMA(Passengers ~ 0 + pdq(2,1,2)))

## Warning: 1 error encountered for ARIMA(Passengers ~ 0 + pdq(2, 1, 2))
## [1] non-stationary AR part from CSS

report(fit4)

## Series: Passengers 
## Model: NULL model 
## NULL model

The forecast in part d is more similar to the forecast in part a and the residuals tend to be white noise here. Remove the constant will result null model and it looks like 0 makes it non-stationary.

#e)

fit5 <-aus_airpassengers %>%
  filter(Year < 2012) %>%
  model(ARIMA(Passengers ~ 1 + pdq(0,2,1)))

## Warning: Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.

fit5 %>% 
  forecast(h=10) %>%
  autoplot(aus_airpassengers) +
  labs(title = "Australian Aircraft Passengers with ARIMA(0,2,1) with constant", y = "Passengers (in millions)")

fit5 %>% 
  gg_tsresiduals() +
  labs(title = "Residuals for ARIMA(0,2,1) with constant")

After keeping the constant, the slop of the forecasting becomes stepper and the warning encourages to remove the constant.

##Q8

#a)

# plot
global_economy %>%
  filter(Code == "USA") %>%
  gg_tsdisplay(GDP, plot_type='partial') +
  labs(title = "Non-transformed United States GDP")

# calculate lambda
lambda <-global_economy %>%
  filter(Code == "USA") %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

From the plot, we can see that the data has an increasing trend which makes it non-stationary. So I applied a Box-Cox transformation to the data.

#b)

fit6 <- global_economy %>%
  filter(Code == "USA") %>%
  model(ARIMA(box_cox(GDP, lambda))) 

report(fit6)

## Series: GDP 
## Model: ARIMA(1,1,0) w/ drift 
## Transformation: box_cox(GDP, lambda) 
## 
## Coefficients:
##          ar1  constant
##       0.4586  118.1822
## s.e.  0.1198    9.5047
## 
## sigma^2 estimated as 5479:  log likelihood=-325.32
## AIC=656.65   AICc=657.1   BIC=662.78

ARIMA(1,1,0) with a drift is suitable for the transformed data.

#c)

# transformed plot
global_economy %>%
  filter(Code == "USA") %>%
  gg_tsdisplay(box_cox(GDP,lambda), plot_type='partial') +
  labs(title = "Transformed United States GDP")

# unit root test
global_economy %>%
  filter(Code == "USA") %>%
  features(box_cox(GDP,lambda), unitroot_ndiffs)

## # A tibble: 1 × 2
##   Country       ndiffs
##   <fct>          <int>
## 1 United States      1

# modeling several 
usa_fit <- global_economy %>%
  filter(Code == "USA") %>%
  model(arima110 = ARIMA(box_cox(GDP,lambda) ~ pdq(1,1,0)),
        arima120 = ARIMA(box_cox(GDP,lambda) ~ pdq(1,2,0)),
        arima210 = ARIMA(box_cox(GDP,lambda) ~ pdq(2,1,0)),
        arima212 = ARIMA(box_cox(GDP,lambda) ~ pdq(2,1,2)),
        arima111 = ARIMA(box_cox(GDP,lambda) ~ pdq(1,1,1)))

glance(usa_fit) %>% arrange(AICc) %>% select(.model:BIC)

## # A tibble: 5 × 6
##   .model   sigma2 log_lik   AIC  AICc   BIC
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 arima120  6780.   -326.  656.  656.  660.
## 2 arima110  5479.   -325.  657.  657.  663.
## 3 arima111  5580.   -325.  659.  659.  667.
## 4 arima210  5580.   -325.  659.  659.  667.
## 5 arima212  5734.   -325.  662.  664.  674.

The data an has increasing trend and it should be transformed once based on the unit root test. The ARIMA (1,2,0) model looks like forest best since it has the lowest AICc.

#d)

usa_fit %>%
  select(arima120) %>%
  gg_tsresiduals() +
  ggtitle("Residuals for ARIMA(1,2,0)")

The residuals of the best model ARIMA(1,2,0) are white noise.

#e)

usa_fit %>%
  forecast(h=5) %>%
  filter(.model=='arima120') %>%
  autoplot(global_economy)

I think it look reasonable because the slop of the trend matches the speed of development.

#f)

fit_ets <- global_economy %>%
  filter(Code == "USA") %>%
  model(ETS(GDP))

report(fit_ets)

## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9990876 
##     beta  = 0.5011949 
## 
##   Initial states:
##          l[0]        b[0]
##  448093333334 64917355687
## 
##   sigma^2:  7e-04
## 
##      AIC     AICc      BIC 
## 3190.787 3191.941 3201.089

fit_ets %>%
  forecast(h=5) %>%
  autoplot(global_economy)

The ETS() model has a larger AICc and this means that this model is worse.

##Q10

Here we choose monthly US employment data for leisure and hospitality jobs series from us_employment

#a)

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  model(STL(Employed)) %>%
  components() %>% autoplot()

The data has an increasing trend without seanality.

#b)

Yes, the data need transforming.

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  model(STL(log(Employed))) %>%
  components() %>% autoplot()

#c)

us_employment %>% filter(Title == "Leisure and Hospitality")  %>%
  autoplot(Employed)

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  features(Employed, unitroot_kpss)

## # A tibble: 1 × 3
##   Series_ID     kpss_stat kpss_pvalue
##   <chr>             <dbl>       <dbl>
## 1 CEU7000000001      11.9        0.01

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  mutate(diff_employed = difference(Employed)) %>%
  features(diff_employed, unitroot_kpss)

## # A tibble: 1 × 3
##   Series_ID     kpss_stat kpss_pvalue
##   <chr>             <dbl>       <dbl>
## 1 CEU7000000001     0.161         0.1

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  features(Employed, unitroot_ndiffs)

## # A tibble: 1 × 2
##   Series_ID     ndiffs
##   <chr>          <int>
## 1 CEU7000000001      1

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  mutate(log_employed = log(Employed)) %>%
  features(log_employed, unitroot_nsdiffs)

## # A tibble: 1 × 2
##   Series_ID     nsdiffs
##   <chr>           <int>
## 1 CEU7000000001       1

us_employment %>% filter(Title == "Leisure and Hospitality") %>%
  mutate(log_employed = difference(log(Employed), 12)) %>%
  features(log_employed, unitroot_ndiffs)

## # A tibble: 1 × 2
##   Series_ID     ndiffs
##   <chr>          <int>
## 1 CEU7000000001      1

The data are not stationary since it has an increasing trend and the test statistic (11.95) is bigger than the 0.01 critical value. We need to difference once based on the KPSS test and here i use the test again to double check. Then i got a bigger p value and this proves that the data are stationary now.

#d)

fit10d <- us_employment %>%
  filter(Title == "Leisure and Hospitality") %>%
  model(ARIMA(Employed)) %>%
  report(fit10d)

## Series: Employed 
## Model: ARIMA(2,1,2)(0,1,2)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2     sma1     sma2
##       1.6621  -0.9333  -1.5105  0.7822  -0.4322  -0.1297
## s.e.  0.0327   0.0299   0.0585  0.0489   0.0342   0.0359
## 
## sigma^2 estimated as 1104:  log likelihood=-4704.55
## AIC=9423.1   AICc=9423.22   BIC=9457.14

us_employment %>%
  filter(Title == "Leisure and Hospitality")  %>%
  gg_tsdisplay(difference(Employed), plot_type='partial')

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

R choose ARIMA(2,1,2)(0,1,2) for us but from the ACF and PACF plot, we can see that the ARIMA(3,1,0) and (2,1,0) are recommended.

us_fit <- us_employment %>%
  filter(Title == "Leisure and Hospitality") %>%
  model(ARIMA310 = ARIMA(Employed ~ pdq(3,1,0)),
        ARIMA210 = ARIMA(Employed ~ pdq(2,1,0)),
        ARIMA212 = ARIMA(Employed ~ pdq(2,1,2)),
        ARIMA012 = ARIMA(Employed ~ pdq(0,1,2)),
        stepwise = ARIMA(Employed),
        auto = ARIMA(Employed, stepwise=FALSE))
report (us_fit)

## Warning in report.mdl_df(us_fit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 6 × 9
##   Series_ID     .model   sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots  
##   <chr>         <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>    
## 1 CEU7000000001 ARIMA310  1160.  -4728. 9467. 9467. 9497. <cpl [3]> <cpl [24]>
## 2 CEU7000000001 ARIMA210  1161.  -4729. 9468. 9468. 9492. <cpl [2]> <cpl [24]>
## 3 CEU7000000001 ARIMA212  1104.  -4705. 9423. 9423. 9457. <cpl [2]> <cpl [26]>
## 4 CEU7000000001 ARIMA012  1159.  -4728. 9466. 9466. 9490. <cpl [0]> <cpl [26]>
## 5 CEU7000000001 stepwise  1104.  -4705. 9423. 9423. 9457. <cpl [2]> <cpl [26]>
## 6 CEU7000000001 auto      1104.  -4705. 9423. 9423. 9457. <cpl [2]> <cpl [26]>

We can see that the ARIMA (3,1,0) model and ARIMA(2,1,0) almost have the same best performance since they the lower AICc.

#e)

fit10e <- us_employment %>%
  filter(Title == "Leisure and Hospitality") %>%
  model(ARIMA(Employed)) %>%
  report(fit10e)

## Series: Employed 
## Model: ARIMA(2,1,2)(0,1,2)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2     sma1     sma2
##       1.6621  -0.9333  -1.5105  0.7822  -0.4322  -0.1297
## s.e.  0.0327   0.0299   0.0585  0.0489   0.0342   0.0359
## 
## sigma^2 estimated as 1104:  log likelihood=-4704.55
## AIC=9423.1   AICc=9423.22   BIC=9457.14

fit10e %>% gg_tsresiduals()

augment(fit10e) %>% features(.innov, ljung_box, lag=10, dof=2)

## # A tibble: 1 × 4
##   Series_ID     .model          lb_stat lb_pvalue
##   <chr>         <chr>             <dbl>     <dbl>
## 1 CEU7000000001 ARIMA(Employed)    21.0   0.00724

From the acf plot and the low p-value here, we can conclude that the residuals are white noise.

#f)

LHARIMA <- us_employment %>%
  filter(Title == "Leisure and Hospitality") %>%
  model(ARIMA(Employed))

LHARIMA %>%
  forecast(h = "3 years") %>%
  autoplot(us_employment, level = NULL)

We can see the actual trend from here: https://fred.stlouisfed.org/series/USLAH It is obviously that the trend has a large decrease at the beginning of 2020s because of COVID-19. In this case, our forecast is not accurate here.

#g)

pi <- LHARIMA %>%
  forecast(h = "3 years") %>%
  mutate(pi = hilo(Employed, level = 95))
pi

## # A fable: 36 x 6 [1M]
## # Key:     Series_ID, .model [1]
##    Series_ID     .model      Month        Employed  .mean                     pi
##    <chr>         <chr>       <mth>          <dist>  <dbl>                 <hilo>
##  1 CEU7000000001 ARIMA(E… 2019 Oct  N(16717, 1104) 16717. [16652.22, 16782.49]95
##  2 CEU7000000001 ARIMA(E… 2019 Nov  N(16519, 2569) 16519. [16419.55, 16618.24]95
##  3 CEU7000000001 ARIMA(E… 2019 Dec  N(16536, 4302) 16536. [16407.56, 16664.67]95
##  4 CEU7000000001 ARIMA(E… 2020 Jan  N(16206, 6108) 16206. [16053.25, 16359.61]95
##  5 CEU7000000001 ARIMA(E… 2020 Feb  N(16347, 7774) 16347. [16173.82, 16519.45]95
##  6 CEU7000000001 ARIMA(E… 2020 Mar  N(16606, 9159) 16606. [16418.33, 16793.48]95
##  7 CEU7000000001 ARIMA(E… 2020 Apr N(16928, 10234) 16928. [16729.77, 17126.31]95
##  8 CEU7000000001 ARIMA(E… 2020 May N(17309, 11062) 17309. [17103.16, 17515.45]95
##  9 CEU7000000001 ARIMA(E… 2020 Jun N(17746, 11753) 17746. [17533.32, 17958.29]95
## 10 CEU7000000001 ARIMA(E… 2020 Jul N(17815, 12422) 17815. [17596.34, 18033.23]95
## # … with 26 more rows

Personally, i think we need at least 5 years, if we consider the factor of the COVID-19. Pandemic usually takes at least 5 years for human to get over it based on our previous experience.

##Q11

#a)

aus_production %>%
 autoplot(Electricity)

Yes,it has an increasing trend with seasonality.

#b)

The data has an apparent increasing trend which makes it non-stationary.

#c)

lambda <- aus_production %>%
 features(Electricity, guerrero) %>%
 pull(lambda_guerrero)

aus_production %>%
 autoplot(box_cox(Electricity, lambda))+
  labs(title = latex2exp::TeX(paste0("AUS Electricity production with $\\lambda$ = ",
         round(lambda,2))))

aus_production %>%
 gg_tsdisplay(box_cox(Electricity, lambda) %>% difference(4), plot_type =
'partial')+
  labs(title = latex2exp::TeX(paste0("BOX-COX transformed AUS Electricity with $\\lambda$ = ",
         round(lambda,2))))

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

aus_production %>%
 gg_tsdisplay(box_cox(Electricity, lambda) %>% difference(4) %>%
difference(1), plot_type = 'partial')

## Warning: Removed 5 row(s) containing missing values (geom_path).

## Warning: Removed 5 rows containing missing values (geom_point).

#e)

fit <- aus_production %>%
 model(
 manual = ARIMA(box_cox(Electricity, lambda) ~ 0 + pdq(1,1,0) +
PDQ(0,1,2)),
 auto = ARIMA(box_cox(Electricity, lambda))
 )
fit %>% select(auto)%>% report()

## Series: Electricity 
## Model: ARIMA(1,1,4)(0,1,1)[4] 
## Transformation: box_cox(Electricity, lambda) 
## 
## Coefficients:
##           ar1     ma1      ma2      ma3      ma4     sma1
##       -0.7039  0.2422  -0.4485  -0.1556  -0.2447  -0.5590
## s.e.   0.1731  0.1934   0.0972   0.0929   0.1180   0.1079
## 
## sigma^2 estimated as 22.96:  log likelihood=-634.86
## AIC=1283.72   AICc=1284.27   BIC=1307.25

glance(fit)

## # A tibble: 2 × 8
##   .model sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 manual   25.2   -646. 1299. 1299. 1312. <cpl [1]> <cpl [8]>
## 2 auto     23.0   -635. 1284. 1284. 1307. <cpl [1]> <cpl [8]>

fit %>% select(auto) %>% augment() %>%
 features(.resid, ljung_box, dof = 6, lag = 12)

## # A tibble: 1 × 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 auto      17.9   0.00636

#f)

aus_production %>%
 model(ETS(Electricity)) %>%
 forecast(h = "2 years") %>%
 autoplot(aus_production)

aus_production %>%
 model(ETS(Electricity)) %>%
 forecast(h = "2 years") %>%
 autoplot(aus_production %>% filter(year(Quarter)>=2000)) +
 autolayer(fit %>% select(auto) %>% forecast(h = "2 years") , colour=2, alpha
=0.4)

fit %>% select(auto) %>% gg_tsresiduals()

##Q13

#a)

fit <- tourism %>%
  model(
    snaive = SNAIVE(Trips ~ lag("year")),
    ets = ETS(Trips),
    arima = ARIMA(Trips)
  )

## Warning in sqrt(diag(best$var.coef)): NaNs produced

fit %>%
  filter(Region == "Snowy Mountains", Purpose == "Holiday") %>%
  select(arima) %>%
  report()

## Series: Trips 
## Model: ARIMA(1,0,0)(0,1,2)[4] 
## 
## Coefficients:
##          ar1     sma1     sma2
##       0.2162  -0.3711  -0.1897
## s.e.  0.1156   0.1280   0.1161
## 
## sigma^2 estimated as 592.9:  log likelihood=-349.58
## AIC=707.16   AICc=707.72   BIC=716.48

#b)

fc1 <- fit %>%
  forecast(h = "2 years")

fit %>%
  filter(Region == "Snowy Mountains", Purpose == "Business") %>%
  select(arima) %>%
  report()

## Series: Trips 
## Model: ARIMA(0,0,0) w/ mean 
## 
## Coefficients:
##       constant
##        16.2361
## s.e.    1.1097
## 
## sigma^2 estimated as 99.76:  log likelihood=-297.12
## AIC=598.24   AICc=598.4   BIC=603.01

fc2 <- fit %>%
  forecast(h = "2 years")

fit %>%
  filter(Region == "Snowy Mountains", Purpose == "Visiting") %>%
  select(arima) %>%
  report()

## Series: Trips 
## Model: ARIMA(0,0,0) w/ mean 
## 
## Coefficients:
##       constant
##        26.3375
## s.e.    1.1800
## 
## sigma^2 estimated as 112.8:  log likelihood=-302.04
## AIC=608.07   AICc=608.23   BIC=612.84

fc3 <- fit %>%
  forecast(h = "2 years")

fit %>%
  filter(Region == "Snowy Mountains", Purpose == "Other") %>%
  select(arima) %>%
  report()

## Series: Trips 
## Model: ARIMA(0,0,0) w/ mean 
## 
## Coefficients:
##       constant
##         6.3491
## s.e.    0.6273
## 
## sigma^2 estimated as 31.88:  log likelihood=-251.49
## AIC=506.99   AICc=507.14   BIC=511.75

fc4 <- fit %>%
  forecast(h = "2 years")

fit %>%
  filter(Region == "Melbourne", Purpose == "Holiday") %>%
  select(arima) %>%
  report()

## Series: Trips 
## Model: ARIMA(0,1,1) w/ drift 
## 
## Coefficients:
##           ma1  constant
##       -0.9309    3.6493
## s.e.   0.0851    0.5709
## 
## sigma^2 estimated as 3069:  log likelihood=-429.24
## AIC=864.48   AICc=864.8   BIC=871.58

fc5 <- fit %>%
  forecast(h = "2 years")

fit %>%
  filter(Region == "Melbourne", Purpose == "Business") %>%
  select(arima) %>%
  report()

## Series: Trips 
## Model: ARIMA(0,1,2)(1,0,1)[4] w/ drift 
## 
## Coefficients:
##           ma1      ma2    sar1     sma1  constant
##       -0.5551  -0.2335  0.9462  -0.7718    0.1930
## s.e.   0.1298   0.1293  0.0636   0.1450    0.2135
## 
## sigma^2 estimated as 3668:  log likelihood=-435.07
## AIC=882.14   AICc=883.3   BIC=896.35

fc6 <- fit %>%
  forecast(h = "2 years")

fit %>%
  filter(Region == "Melbourne", Purpose == "Visiting") %>%
  select(arima) %>%
  report()

## Series: Trips 
## Model: ARIMA(0,1,1)(1,0,2)[4] 
## 
## Coefficients:
##           ma1    sar1     sma1    sma2
##       -0.8377  0.6586  -0.4021  0.3224
## s.e.   0.0652  0.1933   0.2062  0.1429
## 
## sigma^2 estimated as 4241:  log likelihood=-441.55
## AIC=893.1   AICc=893.92   BIC=904.94

fc7 <- fit %>%
  forecast(h = "2 years")

fit %>%
  filter(Region == "Melbourne", Purpose == "Other") %>%
  select(arima) %>%
  report()

## Series: Trips 
## Model: ARIMA(0,1,1) w/ drift 
## 
## Coefficients:
##           ma1  constant
##       -0.7495    1.2363
## s.e.   0.0708    0.6399
## 
## sigma^2 estimated as 489:  log likelihood=-356.09
## AIC=718.18   AICc=718.5   BIC=725.29

fc8 <- fit %>%
  forecast(h = "2 years")

##Q15

autoplot(pelt, Hare) +
  labs(title = "Trading records of furs from 1845s to 1935s",
       subtitle = "Hudson Bay Company",
       y = "Pelts Traded")

auto.arima(pelt$Hare)

## Series: pelt$Hare 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1       mean
##       1.3811  -0.7120  -0.5747  45143.349
## s.e.  0.1016   0.0784   0.1240   3242.191
## 
## sigma^2 = 583278195:  log likelihood = -1046.07
## AIC=2102.15   AICc=2102.85   BIC=2114.7

pelt1= auto.arima(pelt$Hare)
forecast(pelt1)

##     Point Forecast      Lo 80     Hi 80      Lo 95     Hi 95
##  92       6634.282 -24316.666  37585.23 -40701.108  53969.67
##  93      12878.399 -26881.588  52638.39 -47929.254  73686.05
##  94      27999.174 -13659.530  69657.88 -35712.316  91710.66
##  95      44436.817   2773.835  86099.80 -19281.216 108154.85
##  96      56373.412  13601.432  99145.39  -9040.686 121787.51
##  97      61156.103  16469.784 105842.42  -7185.724 129497.93
##  98      59263.137  13247.393 105278.88 -11111.871 129638.14
##  99      53243.727   6844.444  99643.01 -17717.853 124205.31
## 100      46278.080   -122.966  92679.13 -24686.196 117242.36
## 101      40943.427  -5603.322  87490.18 -30243.683 112130.54

acf(pelt)

pacf(pelt)

peltts <- ts(pelt$Hare)
forecast(arima(peltts))

##     Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
##  92       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  93       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  94       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  95       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  96       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  97       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  98       45406.48 -953.6257 91766.59 -25495.19 116308.2
##  99       45406.48 -953.6257 91766.59 -25495.19 116308.2
## 100       45406.48 -953.6257 91766.59 -25495.19 116308.2
## 101       45406.48 -953.6257 91766.59 -25495.19 116308.2

##Q16

Switzerland <- global_economy %>% filter(Country == "Switzerland")
autoplot(ts(Switzerland$Population))+
   labs(title = "The population of Switzerland from 1960 to 2017")

auto.arima(ts(Switzerland$Population))

## Series: ts(Switzerland$Population) 
## ARIMA(0,2,1) 
## 
## Coefficients:
##          ma1
##       0.8765
## s.e.  0.0962
## 
## sigma^2 = 130226144:  log likelihood = -602.65
## AIC=1209.3   AICc=1209.53   BIC=1213.35

ggtsdisplay(diff(ts(Switzerland$Population)))

fSwitzerland= forecast(Arima(ts(Switzerland$Population)))
autoplot(fSwitzerland)

##Q17

#a)

install.packages("Quandl", dependencies = TRUE)

## Error in contrib.url(repos, "source"): trying to use CRAN without setting a mirror

library(Quandl)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following object is masked from 'package:tsibble':
## 
##     index

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

#y <- as_tsibble(Quandl::Quandl("UMICH/SOC1", type="ts"), index = Date,) #doesn't work
y = Quandl("EIA/AEO_2016_REF_NO_CPP_PRCE_NA_COMM_NA_NG_NA_SATL_Y13DLRPMCF_A", api_key="otkvu4KBWBAykqK83Zxi", type="ts")

#b)

autoplot(y)

y %>% ndiffs()

## [1] 1

ggtsdisplay(diff(y))

y.ar = auto.arima(y)
summary(y.ar)

## Series: y 
## ARIMA(0,1,2) 
## 
## Coefficients:
##          ma1     ma2
##       0.8682  0.8879
## s.e.  0.1313  0.2520
## 
## sigma^2 = 0.04139:  log likelihood = 3.92
## AIC=-1.83   AICc=-0.74   BIC=1.94
## 
## Training set error measures:
##                     ME      RMSE      MAE       MPE     MAPE      MASE
## Training set 0.0239043 0.1918064 0.129958 0.2266583 1.239587 0.7440255
##                     ACF1
## Training set -0.04993646

checkresiduals(y.ar)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,2)
## Q* = 7.0725, df = 3, p-value = 0.06962
## 
## Model df: 2.   Total lags used: 5

y1 = Arima(y, order = c(1, 1, 0))
checkresiduals(y1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)
## Q* = 7.6448, df = 4, p-value = 0.1055
## 
## Model df: 1.   Total lags used: 5

fcy1 = forecast(y1, h = 4)
autoplot(fcy1)

yets = ets(y)
summary(yets)

## ETS(A,N,N) 
## 
## Call:
##  ets(y = y) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 10.2023 
## 
##   sigma:  0.2964
## 
##      AIC     AICc      BIC 
## 27.24963 28.29311 31.13714 
## 
## Training set error measures:
##                      ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.05862132 0.285245 0.1682127 0.4917003 1.663627 0.9630386
##                   ACF1
## Training set 0.5005758

autoplot(yets)

checkresiduals(yets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 14.02, df = 3, p-value = 0.002878
## 
## Model df: 2.   Total lags used: 5

fcyets = forecast(yets, h = 4)
autoplot(fcyets)

HW2

Mengjie Pan

2022-04-11