# Chapter 11 Code

set.seed(12345)

stationary_ts <- arima.sim(model = list(order = c(1,0,0),
                                        ar = 0.5 ),
                           n = 500)

——– Code Chank 2 ——–

library(TSstudio)

ts_plot(stationary_ts, 
        title = "Stationary Time Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 3 ——–

set.seed(12345)

non_stationary_ts <- arima.sim(model = list(order = c(1,1,0),ar = 0.3 ),n = 500)

ts_plot(non_stationary_ts, 
        title = "Non-Stationary Time Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 4 ——–

data(AirPassengers)

ts_plot(AirPassengers,
        title = "Monthly Airline Passenger Numbers 1949-1960",
        Ytitle = "Thousands of Passengers",
        Xtitle = "Year")

——– Code Chank 5 ——–

ts_plot(diff(AirPassengers, lag = 1),
        title = "AirPassengers Series - First Differencing",
        Xtitle = "Year",
        Ytitle = "Differencing of Thousands of Passengers")

——– Code Chank 6 ——–

ts_plot(diff(diff(AirPassengers, lag = 1), 12),
        title = "AirPassengers Series - First and Seasonal Differencing",
        Xtitle = "Year",
        Ytitle = "Differencing of Thousands of Passengers")

——– Code Chank 7 ——–

ts_plot(diff(log(AirPassengers), lag = 1),
        title = "AirPassengers Series - First Differencing with Log Transformation",
        Xtitle = "Year",
        Ytitle = "Differencing/Log of Thousands of Passengers")

——– Code Chank 8 ——–

set.seed(12345)

library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p1 <- plot_ly()
p2 <- plot_ly()

for(i in 1:20){
  rm <- NULL
  rw <- arima.sim(model = list(order = c(0, 1, 0)), n = 500)
  p1 <- p1 %>% add_lines(x = time(rw), y = as.numeric(rw))
  p2 <- p2 %>% add_lines(x = time(diff(rw)), y = as.numeric(diff(rw)))
}

——– Code Chank 8 ——–

set.seed(12345)

library(plotly)

p1 <- plot_ly()
p2 <- plot_ly()

for(i in 1:20){
  rm <- NULL
  rw <- arima.sim(model = list(order = c(0, 1, 0)), n = 500)
  p1 <- p1 %>% add_lines(x = time(rw), y = as.numeric(rw))
  p2 <- p2 %>% add_lines(x = time(diff(rw)), y = as.numeric(diff(rw)))
}

——– Code Chank 9 ——–

p1 %>% layout(title = "Simulate Random Walk",
              yaxis = list(title = "Value"),
              xaxis = list(title = "Index")) %>%
  hide_legend()

——– Code Chank 10 ——–

p2 %>% layout(title = "Simulate Random Walk with First-Order Differencing",
              yaxis = list(title = "Value"),
              xaxis = list(title = "Index")) %>%
  hide_legend()

——– Code Chank 11 ——–

set.seed(12345)

ar2 <- arima.sim(model = list(order = c(2,0,0),
                              ar = c(0.9, -0.3)),
                 n = 500) 

——– Code Chank 12 ——–

ts_plot(ar2, 
        title = "Simulate AR(2) Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 13 ——–

md_ar <- ar(ar2)
md_ar
## 
## Call:
## ar(x = ar2)
## 
## Coefficients:
##       1        2  
##  0.8851  -0.2900  
## 
## Order selected 2  sigma^2 estimated as  1.049

——– Code Chank 14 ——–

par(mfrow=c(1,2))
acf(ar2)
pacf(ar2)

——– Code Chank 15 ——–

set.seed(12345)

ma2 <- arima.sim(model = list(order = c(0, 0, 2),
                              ma = c(0.5, -0.3)),
                 n = 500) 

——– Code Chank 16 ——–

ts_plot(ma2, 
        title = "Simulate MA(2) Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 17 ——–

md_ma <- arima(ma2, order = c(0,0,2), method = "ML")
md_ma
## 
## Call:
## arima(x = ma2, order = c(0, 0, 2), method = "ML")
## 
## Coefficients:
##         ma1      ma2  intercept
##       0.530  -0.3454     0.0875
## s.e.  0.041   0.0406     0.0525
## 
## sigma^2 estimated as 0.9829:  log likelihood = -705.81,  aic = 1419.62

——– Code Chank 18 ——–

par(mfrow=c(1,2))
acf(ma2)
pacf(ma2)

# ——– Code Chank 19 ——–

set.seed(12345)

arma <- arima.sim(model = list(order(1,0,2), ar = c(0.7), ma = c(0.5,-0.3)), n = 500)

——– Code Chank 20 ——–

ts_plot(arma, 
        title = "Simulate ARMA(1,2) Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 21 ——–

arma_md <- arima(arma, order = c(1,0,2))
arma_md
## 
## Call:
## arima(x = arma, order = c(1, 0, 2))
## 
## Coefficients:
##          ar1     ma1      ma2  intercept
##       0.7439  0.4785  -0.3954     0.2853
## s.e.  0.0657  0.0878   0.0849     0.1891
## 
## sigma^2 estimated as 1.01:  log likelihood = -713.18,  aic = 1436.36

——– Code Chank 22 ——–

par(mfrow=c(1,2))
acf(arma)
pacf(arma)

# ——– Code Chank 23 ——–

arima(arma, order = c(1, 0, 1), include.mean = FALSE)
## 
## Call:
## arima(x = arma, order = c(1, 0, 1), include.mean = FALSE)
## 
## Coefficients:
##          ar1     ma1
##       0.4144  0.8864
## s.e.  0.0432  0.0248
## 
## sigma^2 estimated as 1.051:  log likelihood = -723,  aic = 1452

——– Code Chank 24 ——–

arima(arma, order = c(2, 0, 1), include.mean = FALSE)
## 
## Call:
## arima(x = arma, order = c(2, 0, 1), include.mean = FALSE)
## 
## Coefficients:
##          ar1     ar2     ma1
##       0.3136  0.1918  0.9227
## s.e.  0.0486  0.0484  0.0183
## 
## sigma^2 estimated as 1.019:  log likelihood = -715.26,  aic = 1438.52

——– Code Chank 25 ——–

arima(arma, order = c(1, 0, 2), include.mean = FALSE)
## 
## Call:
## arima(x = arma, order = c(1, 0, 2), include.mean = FALSE)
## 
## Coefficients:
##          ar1     ma1      ma2
##       0.7602  0.4654  -0.4079
## s.e.  0.0626  0.0858   0.0832
## 
## sigma^2 estimated as 1.014:  log likelihood = -714.27,  aic = 1436.54

——– Code Chank 26 ——–

arima(arma, order = c(2, 0, 2), include.mean = FALSE)
## 
## Call:
## arima(x = arma, order = c(2, 0, 2), include.mean = FALSE)
## 
## Coefficients:
##          ar1     ar2     ma1      ma2
##       0.7239  0.0194  0.4997  -0.3783
## s.e.  0.2458  0.1257  0.2427   0.2134
## 
## sigma^2 estimated as 1.014:  log likelihood = -714.26,  aic = 1438.51

——– Code Chank 27 ——–

library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
best_arma <- arima(arma, order = c(1, 0, 2), include.mean = FALSE)

checkresiduals(best_arma)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,2) with zero mean
## Q* = 5.3129, df = 7, p-value = 0.6218
## 
## Model df: 3.   Total lags used: 10

——– Code Chank 28 ——–

ar_fc <- forecast(md_ar, h = 100)

——– Code Chank 29 ——–

plot_forecast(ar_fc, 
              title = "Forecast AR(2) Model",
              Ytitle = "Value",
              Xtitle = "Year")

——– Code Chank 30 ——–

data(Coffee_Prices)

robusta_price <- window(Coffee_Prices[,1], start = c(2000, 1))

——– Code Chank 31 ——–

ts_plot(robusta_price,
        title = "The Robusta Coffee Monthly Prices",
        Ytitle = "Price in USD",
        Xtitle = "Year") 

——– Code Chank 32 ——–

acf(robusta_price)

# ——– Code Chank 33 ——–

robusta_price_d1 <- diff(robusta_price)

——– Code Chank 34 ——–

par(mfrow=c(1,2))
acf(robusta_price_d1)
pacf(robusta_price_d1)

# ——– Code Chank 35 ——–

robusta_md <- arima(robusta_price, order = c(1, 1, 0))

——– Code Chank 36 ——–

summary(robusta_md)
## 
## Call:
## arima(x = robusta_price, order = c(1, 1, 0))
## 
## Coefficients:
##          ar1
##       0.2780
## s.e.  0.0647
## 
## sigma^2 estimated as 0.007142:  log likelihood = 231.38,  aic = -458.76
## 
## Training set error measures:
##                       ME       RMSE        MAE        MPE     MAPE     MASE
## Training set 0.002595604 0.08432096 0.06494772 0.08104715 4.254984 1.001542
##                     ACF1
## Training set 0.001526295

——– Code Chank 37 ——–

checkresiduals(robusta_md)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)
## Q* = 26.896, df = 23, p-value = 0.2604
## 
## Model df: 1.   Total lags used: 24

——– Code Chank 38 ——–

data(USgas)

——– Code Chank 39 ——–

ts_plot(USgas,
        title = "US Monthly Natural Gas consumption", 
        Ytitle = "Billion Cubic Feet",
        Xtitle = "Year")

——– Code Chank 40 ——–

USgas_split <- ts_split(USgas, sample.out = 12)

train <- USgas_split$train
test <- USgas_split$test

——– Code Chank 41 ——–

par(mfrow=c(1,2))
acf(train, lag.max = 60)
pacf(train, lag.max = 60)

# ——– Code Chank 42 ——–

USgas_d12 <- diff(train, 12)

ts_plot(USgas_d12,
        title = "US Monthly Natural Gas consumption - First Seasonal Difference", 
        Ytitle = "Billion Cubic Feet (First Difference)",
        Xtitle = "Year")

——– Code Chank 43 ——–

USgas_d12_1 <- diff(diff(USgas_d12, 1))

ts_plot(USgas_d12_1,
        title = "US Monthly Natural Gas consumption - First Seasonal and Non-Seasonal Differencing", 
        Ytitle = "Billion Cubic Feet (Difference)",
        Xtitle = "Year")

——– Code Chank 44 ——–

par(mfrow=c(1,2))
acf(USgas_d12_1, lag.max = 60)
pacf(USgas_d12_1, lag.max = 60)

# ——– Code Chank 45 ——–

p <- q <- P <- Q <- 0:2

——– Code Chank 46 ——–

arima_grid <- expand.grid(p,q,P,Q)
names(arima_grid) <- c("p", "q", "P", "Q")
arima_grid$d <- 1
arima_grid$D <- 1

——– Code Chank 47 ——–

arima_grid$k <- rowSums(arima_grid)

——– Code Chank 48 ——–

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
arima_grid <- arima_grid %>% filter(k <= 7)

——– Code Chank 49 ——–

library(dplyr)

arima_grid <- arima_grid %>% filter(k <= 7)
# -------- Code Chank 49 --------
arima_search <- lapply(1:nrow(arima_grid), function(i){
  md <- arima(train, order = c(arima_grid$p[i], 1, arima_grid$q[i]),
              seasonal = list(order = c(arima_grid$P[i], 1, arima_grid$Q[i])),
              method = "CSS")  # Cambiar el método de optimización si es necesario
  results <- data.frame(p = arima_grid$p[i], d = 1, q = arima_grid$q[i],
                        P = arima_grid$P[i], D = 1, Q = arima_grid$Q[i],
                        AIC = md$aic)
}) %>% 
  bind_rows() %>% 
  arrange(AIC)

——– Code Chank 50 ——–

head(arima_search)
##   p d q P D Q AIC
## 1 0 1 0 0 1 0  NA
## 2 1 1 0 0 1 0  NA
## 3 2 1 0 0 1 0  NA
## 4 0 1 1 0 1 0  NA
## 5 1 1 1 0 1 0  NA
## 6 2 1 1 0 1 0  NA

——– Code Chank 51 ——–

USgas_best_md <- arima(train, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
USgas_best_md
## 
## Call:
## arima(x = train, order = c(1, 1, 1), seasonal = list(order = c(2, 1, 1)))
## 
## Coefficients:
##          ar1      ma1    sar1     sar2     sma1
##       0.4247  -0.9180  0.0132  -0.2639  -0.7449
## s.e.  0.0770   0.0376  0.0894   0.0834   0.0753
## 
## sigma^2 estimated as 10160:  log likelihood = -1292.96,  aic = 2597.91

——– Code Chank 52 ——–

USgas_test_fc <- forecast(USgas_best_md, h = 12)

——– Code Chank 53 ——–

accuracy(USgas_test_fc, test)
##                     ME      RMSE      MAE       MPE     MAPE      MASE
## Training set  6.081099  97.85701 73.36854 0.1298714 3.517097 0.6371821
## Test set     42.211254 104.79281 83.09943 1.4913412 3.314280 0.7216918
##                      ACF1 Theil's U
## Training set  0.004565601        NA
## Test set     -0.049999869 0.3469228

——– Code Chank 54 ——–

test_forecast(USgas, 
              forecast.obj = USgas_test_fc,
              test = test)

——– Code Chank 55 ——–

final_md <- arima(USgas, order = c(1,1,1), seasonal = list(order = c(2,1,1)))

——– Code Chank 56 ——–

checkresiduals(final_md)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)(2,1,1)[12]
## Q* = 30.173, df = 19, p-value = 0.04964
## 
## Model df: 5.   Total lags used: 24

——– Code Chank 57 ——–

USgas_fc <- forecast(final_md, h = 12)

——– Code Chank 58 ——–

plot_forecast(USgas_fc,
              title = "US Natural Gas Consumption - Forecast",
              Ytitle = "Billion Cubic Feet",
              Xtitle = "Year")

——– Code Chank 59 ——–

USgas_auto_md1 <- auto.arima(train)
USgas_auto_md1
## Series: train 
## ARIMA(2,1,1)(2,1,1)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1    sar1     sar2     sma1
##       0.4301  -0.0372  -0.9098  0.0118  -0.2673  -0.7431
## s.e.  0.0790   0.0752   0.0452  0.0893   0.0829   0.0751
## 
## sigma^2 = 10446:  log likelihood = -1292.83
## AIC=2599.67   AICc=2600.22   BIC=2623.2

——– Code Chank 60 ——–

USgas_auto_md2 <- auto.arima(train, 
                             max.order = 5,
                             D = 1,
                             d = 1,
                             ic = "aic",
                             stepwise = FALSE, 
                             approximation = FALSE)
USgas_auto_md2
## Series: train 
## ARIMA(1,1,1)(2,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1    sar1     sar2     sma1
##       0.4247  -0.9180  0.0132  -0.2639  -0.7449
## s.e.  0.0770   0.0376  0.0894   0.0834   0.0753
## 
## sigma^2 = 10405:  log likelihood = -1292.96
## AIC=2597.91   AICc=2598.32   BIC=2618.08

——– Code Chank 61 ——–

df <- ts_to_prophet(AirPassengers) %>% setNames(c("date", "y"))

df$lag12 <- dplyr::lag(df$y, n = 12)

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
df$month <- factor(month(df$date, label = TRUE), ordered = FALSE)

df$trend <- 1:nrow(df)

——– Code Chank 62 ——–

par <- ts_split(ts.obj = AirPassengers, sample.out = 12)

train <- par$train

test <- par$test

——– Code Chank 63 ——–

train_df <- df[1:(nrow(df) - 12), ]
test_df <- df[(nrow(df) - 12 + 1):nrow(df), ]

——– Code Chank 64 ——–

md1 <- tslm(train ~ season + trend + lag12, data = train_df)

——– Code Chank 65 ——–

checkresiduals(md1)

## 
##  Breusch-Godfrey test for serial correlation of order up to 24
## 
## data:  Residuals from Linear regression model
## LM test = 83.147, df = 24, p-value = 1.903e-08

——– Code Chank 66 ——–

md2 <- auto.arima(train, 
                  xreg = cbind(model.matrix(~ month,train_df)[,-1], 
                               train_df$trend, 
                               train_df$lag12), 
                  seasonal = TRUE, 
                  stepwise = FALSE, 
                  approximation = FALSE)

——– Code Chank 67 ——–

summary(md2)
## Series: train 
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors 
## 
## Coefficients:
##          ar1     ar2     sar1     sar2  monthFeb  monthMar  monthApr  monthMay
##       0.5849  0.3056  -0.4421  -0.2063   -2.7523    0.8231    0.2066    2.8279
## s.e.  0.0897  0.0903   0.1050   0.1060    1.8417    2.3248    2.4162    2.5560
##       monthJun  monthJul  monthAug  monthSep  monthOct  monthNov  monthDec
##         6.6057   11.2337   12.1909    3.8269    0.6350   -2.2723   -0.9918
## s.e.    3.2916    4.2324    4.1198    2.9243    2.3405    2.4211    1.9172
##                     
##       0.2726  1.0244
## s.e.  0.1367  0.0426
## 
## sigma^2 = 72.82:  log likelihood = -426.93
## AIC=889.86   AICc=896.63   BIC=940.04
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.4117148 8.353629 6.397538 0.2571543 2.549682 0.2100998
##                     ACF1
## Training set 0.005966711

——– Code Chank 68 ——–

checkresiduals(md2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(2,0,0)(2,0,0)[12] errors
## Q* = 25.816, df = 20, p-value = 0.172
## 
## Model df: 4.   Total lags used: 24

——– Code Chank 69 ——–

fc1 <- forecast(md1, newdata = test_df)

fc2 <- forecast(md2, xreg = cbind(model.matrix(~ month,test_df)[,-1], 
                                  test_df$trend, 
                                  test_df$lag12))

——– Code Chank 70 ——–

accuracy(fc1, test)
##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -1.420883e-15 13.99206 11.47136 -0.1200023 4.472154 0.3541758
## Test set      1.079611e+01 20.82782 18.57391  1.9530865 3.828115 0.5734654
##                   ACF1 Theil's U
## Training set 0.7502578        NA
## Test set     0.1653119 0.4052175
accuracy(fc2, test)
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set   0.4117148  8.353629  6.397538  0.2571543 2.549682 0.2100998
## Test set     -11.2123706 17.928195 13.353910 -2.4898843 2.924174 0.4385521
##                      ACF1 Theil's U
## Training set  0.005966711        NA
## Test set     -0.325044929 0.3923795