Chapter 11

Author

Valeria Sofia Vergara Cuello

Hands-On-Time-Series-Analysis-with-R

knitr::opts_chunk$set(echo = TRUE)
library(TSstudio)
Warning: package 'TSstudio' was built under R version 4.2.3
set.seed(12345)
stationary_ts <- arima.sim(model = list(order = c(1,0,0),ar = 0.5 ),n = 500)
ts_plot(stationary_ts, 
        title = "Stationary Time Series",
        Ytitle = "Value",
        Xtitle = "Index")
set.seed(12345)

non_stationary_ts <- arima.sim(model = list(order = c(1,1,0),ar = 0.3 ),n = 500)

ts_plot(non_stationary_ts, 
        title = "Non-Stationary Time Series",
        Ytitle = "Value",
        Xtitle = "Index")
data(AirPassengers)
AirPassengers
     Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
1949 112 118 132 129 121 135 148 148 136 119 104 118
1950 115 126 141 135 125 149 170 170 158 133 114 140
1951 145 150 178 163 172 178 199 199 184 162 146 166
1952 171 180 193 181 183 218 230 242 209 191 172 194
1953 196 196 236 235 229 243 264 272 237 211 180 201
1954 204 188 235 227 234 264 302 293 259 229 203 229
1955 242 233 267 269 270 315 364 347 312 274 237 278
1956 284 277 317 313 318 374 413 405 355 306 271 306
1957 315 301 356 348 355 422 465 467 404 347 305 336
1958 340 318 362 348 363 435 491 505 404 359 310 337
1959 360 342 406 396 420 472 548 559 463 407 362 405
1960 417 391 419 461 472 535 622 606 508 461 390 432
data(AirPassengers)

ts_plot(AirPassengers,
        title = "Monthly Airline Passenger Numbers 1949-1960",
        Ytitle = "Thousands of Passengers",
        Xtitle = "Year")
data(AirPassengers)

ts_plot(AirPassengers,
        title = "Monthly Airline Passenger Numbers 1949-1960",
        Ytitle = "Thousands of Passengers",
        Xtitle = "Year")
ts_plot(diff(AirPassengers, lag = 1),
        title = "AirPassengers Series - First Differencing",
        Xtitle = "Year",
        Ytitle = "Differencing of Thousands of Passengers")
ts_plot(diff(diff(AirPassengers, lag = 1), 12),
        title = "AirPassengers Series - First and Seasonal Differencing",
        Xtitle = "Year",
        Ytitle = "Differencing of Thousands of Passengers")
ts_plot(diff(log(AirPassengers), lag = 1),
        title = "AirPassengers Series - First Differencing with Log Transformation",
        Xtitle = "Year",
        Ytitle = "Differencing/Log of Thousands of Passengers")
set.seed(12345)

library(plotly)
Loading required package: ggplot2

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
p1 <- plot_ly()
p2 <- plot_ly()

for(i in 1:20){
  rm <- NULL
  rw <- arima.sim(model = list(order = c(0, 1, 0)), n = 500)
  p1 <- p1 %>% add_lines(x = time(rw), y = as.numeric(rw))
  p2 <- p2 %>% add_lines(x = time(diff(rw)), y = as.numeric(diff(rw)))
}
p1 %>% layout(title = "Simulate Random Walk",
              yaxis = list(title = "Value"),
              xaxis = list(title = "Index")) %>%
  hide_legend()
p2 %>% layout(title = "Simulate Random Walk with First-Order Differencing",
              yaxis = list(title = "Value"),
              xaxis = list(title = "Index")) %>%
  hide_legend()
set.seed(12345)

ar2 <- arima.sim(model = list(order = c(2,0,0),
                              ar = c(0.9, -0.3)),
                 n = 500) 
ts_plot(ar2, 
        title = "Simulate AR(2) Series",
        Ytitle = "Value",
        Xtitle = "Index")
md_ar <- ar(ar2)
md_ar

Call:
ar(x = ar2)

Coefficients:
      1        2  
 0.8851  -0.2900  

Order selected 2  sigma^2 estimated as  1.049
par(mfrow=c(1,2))
acf(ar2)
pacf(ar2)

set.seed(12345)

ma2 <- arima.sim(model = list(order = c(0, 0, 2),
                              ma = c(0.5, -0.3)),
                 n = 500) 
ts_plot(ma2, 
        title = "Simulate MA(2) Series",
        Ytitle = "Value",
        Xtitle = "Index")
md_ma <- arima(ma2, order = c(0,0,2), method = "ML")
md_ma

Call:
arima(x = ma2, order = c(0, 0, 2), method = "ML")

Coefficients:
        ma1      ma2  intercept
      0.530  -0.3454     0.0875
s.e.  0.041   0.0406     0.0525

sigma^2 estimated as 0.9829:  log likelihood = -705.81,  aic = 1419.62
par(mfrow=c(1,2))
acf(ma2)
pacf(ma2)

set.seed(12345)

arma <- arima.sim(model = list(order(1,0,2), ar = c(0.7), ma = c(0.5,-0.3)), n = 500)
ts_plot(arma, 
        title = "Simulate ARMA(1,2) Series",
        Ytitle = "Value",
        Xtitle = "Index")
arma_md <- arima(arma, order = c(1,0,2))
arma_md

Call:
arima(x = arma, order = c(1, 0, 2))

Coefficients:
         ar1     ma1      ma2  intercept
      0.7439  0.4785  -0.3954     0.2853
s.e.  0.0657  0.0878   0.0849     0.1891

sigma^2 estimated as 1.01:  log likelihood = -713.18,  aic = 1436.36
par(mfrow=c(1,2))
acf(arma)
pacf(arma)

arima(arma, order = c(1, 0, 1), include.mean = FALSE)

Call:
arima(x = arma, order = c(1, 0, 1), include.mean = FALSE)

Coefficients:
         ar1     ma1
      0.4144  0.8864
s.e.  0.0432  0.0248

sigma^2 estimated as 1.051:  log likelihood = -723,  aic = 1452
arima(arma, order = c(2, 0, 1), include.mean = FALSE)

Call:
arima(x = arma, order = c(2, 0, 1), include.mean = FALSE)

Coefficients:
         ar1     ar2     ma1
      0.3136  0.1918  0.9227
s.e.  0.0486  0.0484  0.0183

sigma^2 estimated as 1.019:  log likelihood = -715.26,  aic = 1438.52
arima(arma, order = c(1, 0, 2), include.mean = FALSE)

Call:
arima(x = arma, order = c(1, 0, 2), include.mean = FALSE)

Coefficients:
         ar1     ma1      ma2
      0.7602  0.4654  -0.4079
s.e.  0.0626  0.0858   0.0832

sigma^2 estimated as 1.014:  log likelihood = -714.27,  aic = 1436.54
arima(arma, order = c(2, 0, 2), include.mean = FALSE)

Call:
arima(x = arma, order = c(2, 0, 2), include.mean = FALSE)

Coefficients:
         ar1     ar2     ma1      ma2
      0.7239  0.0194  0.4997  -0.3783
s.e.  0.2458  0.1257  0.2427   0.2134

sigma^2 estimated as 1.014:  log likelihood = -714.26,  aic = 1438.51
library(forecast)
Warning: package 'forecast' was built under R version 4.2.3
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
best_arma <- arima(arma, order = c(1, 0, 2), include.mean = FALSE)

checkresiduals(best_arma)


    Ljung-Box test

data:  Residuals from ARIMA(1,0,2) with zero mean
Q* = 5.3129, df = 7, p-value = 0.6218

Model df: 3.   Total lags used: 10
ar_fc <- forecast(md_ar, h = 100)
plot_forecast(ar_fc, 
              title = "Forecast AR(2) Model",
              Ytitle = "Value",
              Xtitle = "Year")
data(Coffee_Prices)

robusta_price <- window(Coffee_Prices[,1], start = c(2000, 1))
ts_plot(robusta_price,
        title = "The Robusta Coffee Monthly Prices",
        Ytitle = "Price in USD",
        Xtitle = "Year") 
acf(robusta_price)

robusta_price_d1 <- diff(robusta_price)
par(mfrow=c(1,2))
acf(robusta_price_d1)
pacf(robusta_price_d1)

robusta_md <- arima(robusta_price, order = c(1, 1, 0))
robusta_md

Call:
arima(x = robusta_price, order = c(1, 1, 0))

Coefficients:
         ar1
      0.2780
s.e.  0.0647

sigma^2 estimated as 0.007142:  log likelihood = 231.38,  aic = -458.76
summary(robusta_md)

Call:
arima(x = robusta_price, order = c(1, 1, 0))

Coefficients:
         ar1
      0.2780
s.e.  0.0647

sigma^2 estimated as 0.007142:  log likelihood = 231.38,  aic = -458.76

Training set error measures:
                      ME       RMSE        MAE        MPE     MAPE     MASE
Training set 0.002595604 0.08432096 0.06494772 0.08104715 4.254984 1.001542
                    ACF1
Training set 0.001526295
checkresiduals(robusta_md)


    Ljung-Box test

data:  Residuals from ARIMA(1,1,0)
Q* = 26.896, df = 23, p-value = 0.2604

Model df: 1.   Total lags used: 24
data(USgas)
ts_plot(USgas,
        title = "US Monthly Natural Gas consumption", 
        Ytitle = "Billion Cubic Feet",
        Xtitle = "Year")
USgas_split <- ts_split(USgas, sample.out = 12)

train <- USgas_split$train
test <- USgas_split$test
par(mfrow=c(1,2))
acf(train, lag.max = 60)
pacf(train, lag.max = 60)

USgas_d12 <- diff(train, 12)

ts_plot(USgas_d12,
        title = "US Monthly Natural Gas consumption - First Seasonal Difference", 
        Ytitle = "Billion Cubic Feet (First Difference)",
        Xtitle = "Year")
USgas_d12_1 <- diff(diff(USgas_d12, 1))

ts_plot(USgas_d12_1,
        title = "US Monthly Natural Gas consumption - First Seasonal and Non-Seasonal Differencing", 
        Ytitle = "Billion Cubic Feet (Difference)",
        Xtitle = "Year")
par(mfrow=c(1,2))
acf(USgas_d12_1, lag.max = 60)
pacf(USgas_d12_1, lag.max = 60)

p <- q <- P <- Q <- 0:2
arima_grid <- expand.grid(p,q,P,Q)
names(arima_grid) <- c("p", "q", "P", "Q")
arima_grid$d <- 1
arima_grid$D <- 1
arima_grid$k <- rowSums(arima_grid)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
arima_grid <- arima_grid %>% filter(k <= 7)
arima_search <- lapply(1:nrow(arima_grid), function(i){
  md <- NULL
  # md <- arima(train, order = c(arima_grid$p[i], 1, arima_grid$q[i]), 
  #             seasonal = list(order = c(arima_grid$P[i], 1, arima_grid$Q[i])))
  # results <- data.frame(p = arima_grid$p[i], d = 1, q = arima_grid$q[i],
  #                       P = arima_grid$P[i], D = 1, Q = arima_grid$Q[i],
  #                       AIC = md$aic)
}) 
# %>% bind_rows() %>% arrange(AIC)
arima_search <- data.frame(p=rep(0,81), d=rep(0,81), q=rep(0,81))
head(arima_search)
  p d q
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
USgas_best_md <- arima(train, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
USgas_best_md

Call:
arima(x = train, order = c(1, 1, 1), seasonal = list(order = c(2, 1, 1)))

Coefficients:
         ar1      ma1    sar1     sar2     sma1
      0.4247  -0.9180  0.0132  -0.2639  -0.7449
s.e.  0.0770   0.0376  0.0894   0.0834   0.0753

sigma^2 estimated as 10160:  log likelihood = -1292.96,  aic = 2597.91
library(forecast)
USgas_test_fc <- forecast(USgas_best_md, h = 12)
accuracy(USgas_test_fc, test)
                    ME      RMSE      MAE       MPE     MAPE      MASE
Training set  6.081099  97.85701 73.36854 0.1298714 3.517097 0.6371821
Test set     42.211253 104.79281 83.09943 1.4913412 3.314280 0.7216918
                     ACF1 Theil's U
Training set  0.004565602        NA
Test set     -0.049999868 0.3469228
test_forecast(USgas, 
              forecast.obj = USgas_test_fc,
              test = test)
final_md <- arima(USgas, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
checkresiduals(final_md)


    Ljung-Box test

data:  Residuals from ARIMA(1,1,1)(2,1,1)[12]
Q* = 30.173, df = 19, p-value = 0.04964

Model df: 5.   Total lags used: 24
USgas_fc <- forecast(final_md, h = 12)
plot_forecast(USgas_fc,
              title = "US Natural Gas Consumption - Forecast",
              Ytitle = "Billion Cubic Feet",
              Xtitle = "Year")
USgas_auto_md1 <- auto.arima(train)
USgas_auto_md1
Series: train 
ARIMA(2,1,1)(2,1,1)[12] 

Coefficients:
         ar1      ar2      ma1    sar1     sar2     sma1
      0.4301  -0.0372  -0.9098  0.0117  -0.2673  -0.7431
s.e.  0.0794   0.0741   0.0452  0.0887   0.0830   0.0751

sigma^2 = 10446:  log likelihood = -1292.83
AIC=2599.67   AICc=2600.22   BIC=2623.2
USgas_auto_md2 <- auto.arima(train, 
                             max.order = 5,
                             D = 1,
                             d = 1,
                             ic = "aic",
                             stepwise = FALSE, 
                             approximation = FALSE)
USgas_auto_md2
Series: train 
ARIMA(1,1,1)(2,1,1)[12] 

Coefficients:
         ar1      ma1    sar1     sar2     sma1
      0.4247  -0.9180  0.0132  -0.2639  -0.7449
s.e.  0.0770   0.0376  0.0894   0.0834   0.0753

sigma^2 = 10405:  log likelihood = -1292.96
AIC=2597.91   AICc=2598.32   BIC=2618.08
df <- ts_to_prophet(AirPassengers) %>% setNames(c("date", "y"))

df$lag12 <- dplyr::lag(df$y, n = 12)

library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
df$month <- factor(month(df$date, label = TRUE), ordered = FALSE)

df$trend <- 1:nrow(df)
par <- ts_split(ts.obj = AirPassengers, sample.out = 12)

train <- par$train

test <- par$test
train_df <- df[1:(nrow(df) - 12), ]
test_df <- df[(nrow(df) - 12 + 1):nrow(df), ]
md1 <- tslm(train ~ season + trend + lag12, data = train_df)
checkresiduals(md1)


    Breusch-Godfrey test for serial correlation of order up to 24

data:  Residuals from Linear regression model
LM test = 83.147, df = 24, p-value = 1.903e-08
md2 <- auto.arima(train, 
                  xreg = cbind(model.matrix(~ month,train_df)[,-1], 
                               train_df$trend, 
                               train_df$lag12), 
                  seasonal = TRUE, 
                  stepwise = FALSE, 
                  approximation = FALSE)
 summary(md2)
Series: train 
Regression with ARIMA(2,0,0)(2,0,0)[12] errors 

Coefficients:
         ar1     ar2     sar1     sar2  monthfeb  monthmar  monthabr  monthmay
      0.5849  0.3056  -0.4421  -0.2063   -2.7523    0.8231    0.2066    2.8279
s.e.  0.0897  0.0903   0.1050   0.1060    1.8417    2.3248    2.4162    2.5560
      monthjun  monthjul  monthago  monthsept  monthoct  monthnov  monthdic
        6.6057   11.2337   12.1909     3.8269    0.6350   -2.2723   -0.9918
s.e.    3.2916    4.2324    4.1198     2.9243    2.3405    2.4211    1.9172
                    
      0.2726  1.0244
s.e.  0.1367  0.0426

sigma^2 = 72.82:  log likelihood = -426.93
AIC=889.86   AICc=896.63   BIC=940.04

Training set error measures:
                    ME     RMSE      MAE       MPE     MAPE      MASE
Training set 0.4117147 8.353629 6.397538 0.2571543 2.549682 0.2100998
                    ACF1
Training set 0.005966715
checkresiduals(md2)


    Ljung-Box test

data:  Residuals from Regression with ARIMA(2,0,0)(2,0,0)[12] errors
Q* = 25.816, df = 20, p-value = 0.172

Model df: 4.   Total lags used: 24
fc1 <- forecast(md1, newdata = test_df)

fc2 <- forecast(md2, xreg = cbind(model.matrix(~ month,test_df)[,-1], 
                                  test_df$trend, 
                                  test_df$lag12))
x <- fc2

accuracy(fc1, test)
                        ME     RMSE      MAE        MPE     MAPE      MASE
Training set -1.658222e-15 13.99206 11.47136 -0.1200023 4.472154 0.3541758
Test set      1.079611e+01 20.82782 18.57391  1.9530865 3.828115 0.5734654
                  ACF1 Theil's U
Training set 0.7502578        NA
Test set     0.1653119 0.4052175
accuracy(fc2, test)
                      ME      RMSE       MAE        MPE     MAPE      MASE
Training set   0.4117147  8.353629  6.397538  0.2571543 2.549682 0.2100998
Test set     -11.2123707 17.928195 13.353910 -2.4898843 2.924174 0.4385521
                     ACF1 Theil's U
Training set  0.005966715        NA
Test set     -0.325044930 0.3923795