HW 3 Time Series

library("feasts")

## Loading required package: fabletools

library("seasonal")
library("tsibble")

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library("tsibbledata")
library("dplyr")

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library("ggplot2")
library("forecast")

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library("fable")
library("fpp3")

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──

## ✓ tibble    3.1.6     ✓ lubridate 1.8.0
## ✓ tidyr     1.2.0

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## x lubridate::date()     masks base::date()
## x dplyr::filter()       masks stats::filter()
## x tsibble::intersect()  masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag()          masks stats::lag()
## x tsibble::setdiff()    masks base::setdiff()
## x tsibble::union()      masks base::union()
## x tibble::view()        masks seasonal::view()

library("psych")

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following object is masked from 'package:seasonal':
## 
##     outlier

library("car")

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

## The following object is masked from 'package:dplyr':
## 
##     recode

library("glmnet")

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-4

library("ISLR")
library("leaps")
library("tseries")
library("weathermetrics")
library("seasonalview")

## 
## Attaching package: 'seasonalview'

## The following object is masked from 'package:tibble':
## 
##     view

## The following object is masked from 'package:seasonal':
## 
##     view

library("fma")
library("vars")

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: strucchange

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following object is masked from 'package:tsibble':
## 
##     index

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: sandwich

## Loading required package: urca

## Loading required package: lmtest

## 
## Attaching package: 'vars'

## The following object is masked from 'package:fable':
## 
##     VAR

library("expsmooth")
library("seasonalview")
library("fma")
library("fabletools")
library("tidyverse")

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ readr   2.1.2     ✓ stringr 1.4.0
## ✓ purrr   0.3.4     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x psych::%+%()             masks ggplot2::%+%()
## x psych::alpha()           masks ggplot2::alpha()
## x lubridate::as.difftime() masks base::as.difftime()
## x stringr::boundary()      masks strucchange::boundary()
## x lubridate::date()        masks base::date()
## x Matrix::expand()         masks tidyr::expand()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks tsibble::intersect(), base::intersect()
## x lubridate::interval()    masks tsibble::interval()
## x dplyr::lag()             masks stats::lag()
## x Matrix::pack()           masks tidyr::pack()
## x car::recode()            masks dplyr::recode()
## x MASS::select()           masks dplyr::select()
## x lubridate::setdiff()     masks tsibble::setdiff(), base::setdiff()
## x purrr::some()            masks car::some()
## x lubridate::union()       masks tsibble::union(), base::union()
## x Matrix::unpack()         masks tidyr::unpack()
## x seasonalview::view()     masks tibble::view(), seasonal::view()

library("tibble")
library("lubridate")
library("tsibbledata")

#Import data

The data selected here are monthly data for U.S. cement production index from January 1972 to January 2022. In the data, the industrial production (IP) index measures the real output of all relevant establishments located in the United States, regardless of their ownership, but not those located in U.S. territories.

data <- read.csv("~/Desktop/us.cement.csv")

#Processing data

data <- data %>%
   mutate(DATE = as.Date(DATE), Month = yearmonth(DATE)) %>%
  as_tsibble(index = Month)

#90% as train data and 10% as test data

train <- data[1:540,]
test <- data[541:601, ]

#Plotting the time series of the data

The following figure plots the time series of the data, and we can see that the data is essentially stationary.

data_ts <- ts(data$IPN32731S,start=c(1972,1),end=c(2022,1), frequency=12)
data_ts %>% autoplot() + labs(title="Manufacturing: Durable Goods: Cement ") + 
  ylab("Index 2017=100") +
  guides(colour=guide_legend(title="index"))

In this case, I will use three models, ARIMA, ETS and Neural Nets, and compare the performance of the prediction results of these three models.

#ARIMA Model

The output below shows the model selected and estimated by ARIMA. From below we can see that the ARIMA(0,1,2)(0,0,2) model is suggested by R.

fit_arima <- train %>% model(ARIMA(IPN32731S))
report(fit_arima)

## Series: IPN32731S 
## Model: ARIMA(0,1,2)(0,0,2)[12] 
## 
## Coefficients:
##           ma1      ma2    sma1     sma2
##       -0.0661  -0.1605  0.0520  -0.1497
## s.e.   0.0427   0.0434  0.0432   0.0443
## 
## sigma^2 estimated as 26.48:  log likelihood=-1646.15
## AIC=3302.29   AICc=3302.4   BIC=3323.74

The ARIMA model basically captures all the dynamics and performs well except at lag 13, where the residuals are essentially white noise.

fit_arima %>% gg_tsresiduals(lag_max = 16)

augment(fit_arima) %>%
  features(.innov, ljung_box, lag = 16, dof = 6)

## # A tibble: 1 × 3
##   .model           lb_stat lb_pvalue
##   <chr>              <dbl>     <dbl>
## 1 ARIMA(IPN32731S)    13.0     0.223

The figure below shows the cement production index predicted for the next 5 years using the ARIMA(0,1,2)(0,0,2) model. We can see that the values predicted by the ARIMA model are close to the actual data, and the ARIMA model performs well from the graph.

train %>%
  model(ARIMA(IPN32731S)) %>%
  forecast(h= "5 years") %>%
  autoplot(data) +
 labs(title = "ARIMA(0,1,2)(0,0,2) Prediction: Cement production in US",
       y = "Index")

#ETS Model

The output below shows the model selected and estimated by ETS. From below we can see that the ETS(M,N,N) model is suggested by R.

fit_ets <- train %>% model(ETS(IPN32731S))
report(fit_ets)

## Series: IPN32731S 
## Model: ETS(M,N,N) 
##   Smoothing parameters:
##     alpha = 0.9156997 
## 
##   Initial states:
##      l[0]
##  143.9102
## 
##   sigma^2:  0.0018
## 
##      AIC     AICc      BIC 
## 5191.123 5191.167 5203.997

As we can see from the figure below, the ETS model seems to perform a little worse than the ARIMA model, although it also captures most of the dynamics, except for the borders highlighted at lag 2 and lag13.

fit_ets %>%
  gg_tsresiduals(lag_max = 16)

augment(fit_ets) %>%
  features(.innov, ljung_box, lag = 16, dof = 6)

## # A tibble: 1 × 3
##   .model         lb_stat lb_pvalue
##   <chr>            <dbl>     <dbl>
## 1 ETS(IPN32731S)    38.5 0.0000312

The figure below shows the cement production index predicted for the next 5 years using the ETS(M,N,N) model. We can see that the values predicted by the ETS model are also close to the actual data but slightly further away from the actual values than the ARIMA model.

fc_ets <- forecast(fit_ets,h = "5 years")
autoplot(fc_ets, data)+
 labs(title = "ETS(M,N,N) Prediction: Cement production in US",
       y = "Index")

#Neural Nets Model

The output below shows the model selected and estimated by Neural Nets. From below we can see that the NNAR(25,1,13) model is suggested by R.

fit_nn <- train %>% model(NNETAR(IPN32731S))
report(fit_nn)

## Series: IPN32731S 
## Model: NNAR(25,1,13)[12] 
## 
## Average of 20 networks, each of which is
## a 25-13-1 network with 352 weights
## options were - linear output units 
## 
## sigma^2 estimated as 2.662

The output below also shows the model selected and estimated by Neural Nets. This model does well in capturing all the dynamics in the data, as the residuals similarly appear to be white noise.

fit_nn %>% gg_tsresiduals(lag_max = 16)

## Warning: Removed 25 row(s) containing missing values (geom_path).

## Warning: Removed 25 rows containing missing values (geom_point).

## Warning: Removed 25 rows containing non-finite values (stat_bin).

The figure below shows the cement production index predicted for the next 5 years using the NNAR(25,1,13) model. From the graph, we can clearly see that the predicted value of the Neural Nets model deviates from the actual value, and is the only one of the three models that can be easily judged from the graph as the worst-performing model.

fc_nn <- forecast(fit_nn,h = "5 years")
autoplot(fc_nn, data)+
  labs(title=" NNAR(25,1,13) Prediction")

After looking at the results of these three models’ predictions on the graph, the accuracy of these three models’ predictions will be shown next in combination. The output below evaluates the forecasting performance of the three competing models over the test set. In this case, we can clearly see that the ARIMA model is the most accurate model based on the test set RMSE, MAPE and MASE.

bind_rows(
    fit_arima %>% accuracy(),
    fit_ets %>% accuracy(),
    fit_nn %>% accuracy(),
    fit_arima %>% forecast(h = "5 years") %>% accuracy(data),
    fit_ets %>% forecast(h = "5 years") %>% accuracy(data),
    fit_nn %>% forecast(h = "5 years") %>% accuracy(data)
  ) %>%
  dplyr::select(-ME, -MPE, -ACF1)

## # A tibble: 6 × 7
##   .model            .type     RMSE   MAE   MAPE  MASE RMSSE
##   <chr>             <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 ARIMA(IPN32731S)  Training  5.12  3.44  2.81  0.342 0.365
## 2 ETS(IPN32731S)    Training  5.26  3.47  2.82  0.345 0.375
## 3 NNETAR(IPN32731S) Training  1.63  1.13  0.953 0.113 0.116
## 4 ARIMA(IPN32731S)  Test      4.49  3.50  3.37  0.348 0.320
## 5 ETS(IPN32731S)    Test      4.76  3.85  3.72  0.383 0.339
## 6 NNETAR(IPN32731S) Test     16.1  15.2  14.9   1.51  1.14

HW 3 Time Series

Mengjie Pan

2022-04-29