# Chapter 11 Code

set.seed(12345)

stationary_ts <- arima.sim(model = list(order = c(1,0,0),
                                        ar = 0.5 ),
                           n = 500)

——– Code Chank 2 ——–

library(TSstudio)

ts_plot(stationary_ts, 
        title = "Stationary Time Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 3 ——–

set.seed(12345)

non_stationary_ts <- arima.sim(model = list(order = c(1,1,0),ar = 0.3 ),n = 500)

ts_plot(non_stationary_ts, 
        title = "Non-Stationary Time Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 4 ——–

data(AirPassengers)

ts_plot(AirPassengers,
        title = "Monthly Airline Passenger Numbers 1949-1960",
        Ytitle = "Thousands of Passengers",
        Xtitle = "Year")

——– Code Chank 5 ——–

ts_plot(diff(AirPassengers, lag = 1),
        title = "AirPassengers Series - First Differencing",
        Xtitle = "Year",
        Ytitle = "Differencing of Thousands of Passengers")

——– Code Chank 6 ——–

ts_plot(diff(diff(AirPassengers, lag = 1), 12),
        title = "AirPassengers Series - First and Seasonal Differencing",
        Xtitle = "Year",
        Ytitle = "Differencing of Thousands of Passengers")

——– Code Chank 7 ——–

ts_plot(diff(log(AirPassengers), lag = 1),
        title = "AirPassengers Series - First Differencing with Log Transformation",
        Xtitle = "Year",
        Ytitle = "Differencing/Log of Thousands of Passengers")

——– Code Chank 8 ——–

set.seed(12345)

library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

p1 <- plot_ly()
p2 <- plot_ly()

for(i in 1:20){
  rm <- NULL
  rw <- arima.sim(model = list(order = c(0, 1, 0)), n = 500)
  p1 <- p1 %>% add_lines(x = time(rw), y = as.numeric(rw))
  p2 <- p2 %>% add_lines(x = time(diff(rw)), y = as.numeric(diff(rw)))
}

——– Code Chank 8 ——–

set.seed(12345)

library(plotly)

p1 <- plot_ly()
p2 <- plot_ly()

for(i in 1:20){
  rm <- NULL
  rw <- arima.sim(model = list(order = c(0, 1, 0)), n = 500)
  p1 <- p1 %>% add_lines(x = time(rw), y = as.numeric(rw))
  p2 <- p2 %>% add_lines(x = time(diff(rw)), y = as.numeric(diff(rw)))
}

——– Code Chank 9 ——–

p1 %>% layout(title = "Simulate Random Walk",
              yaxis = list(title = "Value"),
              xaxis = list(title = "Index")) %>%
  hide_legend()

——– Code Chank 10 ——–

p2 %>% layout(title = "Simulate Random Walk with First-Order Differencing",
              yaxis = list(title = "Value"),
              xaxis = list(title = "Index")) %>%
  hide_legend()

——– Code Chank 11 ——–

set.seed(12345)

ar2 <- arima.sim(model = list(order = c(2,0,0),
                              ar = c(0.9, -0.3)),
                 n = 500)

——– Code Chank 12 ——–

ts_plot(ar2, 
        title = "Simulate AR(2) Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 13 ——–

md_ar <- ar(ar2)
md_ar

## 
## Call:
## ar(x = ar2)
## 
## Coefficients:
##       1        2  
##  0.8851  -0.2900  
## 
## Order selected 2  sigma^2 estimated as  1.049

——– Code Chank 14 ——–

par(mfrow=c(1,2))
acf(ar2)
pacf(ar2)

——– Code Chank 15 ——–

set.seed(12345)

ma2 <- arima.sim(model = list(order = c(0, 0, 2),
                              ma = c(0.5, -0.3)),
                 n = 500)

——– Code Chank 16 ——–

ts_plot(ma2, 
        title = "Simulate MA(2) Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 17 ——–

md_ma <- arima(ma2, order = c(0,0,2), method = "ML")
md_ma

## 
## Call:
## arima(x = ma2, order = c(0, 0, 2), method = "ML")
## 
## Coefficients:
##         ma1      ma2  intercept
##       0.530  -0.3454     0.0875
## s.e.  0.041   0.0406     0.0525
## 
## sigma^2 estimated as 0.9829:  log likelihood = -705.81,  aic = 1419.62

——– Code Chank 18 ——–

par(mfrow=c(1,2))
acf(ma2)
pacf(ma2)

# ——– Code Chank 19 ——–

set.seed(12345)

arma <- arima.sim(model = list(order(1,0,2), ar = c(0.7), ma = c(0.5,-0.3)), n = 500)

——– Code Chank 20 ——–

ts_plot(arma, 
        title = "Simulate ARMA(1,2) Series",
        Ytitle = "Value",
        Xtitle = "Index")

——– Code Chank 21 ——–

arma_md <- arima(arma, order = c(1,0,2))
arma_md

## 
## Call:
## arima(x = arma, order = c(1, 0, 2))
## 
## Coefficients:
##          ar1     ma1      ma2  intercept
##       0.7439  0.4785  -0.3954     0.2853
## s.e.  0.0657  0.0878   0.0849     0.1891
## 
## sigma^2 estimated as 1.01:  log likelihood = -713.18,  aic = 1436.36

——– Code Chank 22 ——–

par(mfrow=c(1,2))
acf(arma)
pacf(arma)

# ——– Code Chank 23 ——–

arima(arma, order = c(1, 0, 1), include.mean = FALSE)

## 
## Call:
## arima(x = arma, order = c(1, 0, 1), include.mean = FALSE)
## 
## Coefficients:
##          ar1     ma1
##       0.4144  0.8864
## s.e.  0.0432  0.0248
## 
## sigma^2 estimated as 1.051:  log likelihood = -723,  aic = 1452

——– Code Chank 24 ——–

arima(arma, order = c(2, 0, 1), include.mean = FALSE)

## 
## Call:
## arima(x = arma, order = c(2, 0, 1), include.mean = FALSE)
## 
## Coefficients:
##          ar1     ar2     ma1
##       0.3136  0.1918  0.9227
## s.e.  0.0486  0.0484  0.0183
## 
## sigma^2 estimated as 1.019:  log likelihood = -715.26,  aic = 1438.52

——– Code Chank 25 ——–

arima(arma, order = c(1, 0, 2), include.mean = FALSE)

## 
## Call:
## arima(x = arma, order = c(1, 0, 2), include.mean = FALSE)
## 
## Coefficients:
##          ar1     ma1      ma2
##       0.7602  0.4654  -0.4079
## s.e.  0.0626  0.0858   0.0832
## 
## sigma^2 estimated as 1.014:  log likelihood = -714.27,  aic = 1436.54

——– Code Chank 26 ——–

arima(arma, order = c(2, 0, 2), include.mean = FALSE)

## 
## Call:
## arima(x = arma, order = c(2, 0, 2), include.mean = FALSE)
## 
## Coefficients:
##          ar1     ar2     ma1      ma2
##       0.7239  0.0194  0.4997  -0.3783
## s.e.  0.2458  0.1257  0.2427   0.2134
## 
## sigma^2 estimated as 1.014:  log likelihood = -714.26,  aic = 1438.51

——– Code Chank 27 ——–

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

best_arma <- arima(arma, order = c(1, 0, 2), include.mean = FALSE)

checkresiduals(best_arma)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,2) with zero mean
## Q* = 5.3129, df = 7, p-value = 0.6218
## 
## Model df: 3.   Total lags used: 10

——– Code Chank 28 ——–

ar_fc <- forecast(md_ar, h = 100)

——– Code Chank 29 ——–

plot_forecast(ar_fc, 
              title = "Forecast AR(2) Model",
              Ytitle = "Value",
              Xtitle = "Year")

——– Code Chank 30 ——–

data(Coffee_Prices)

robusta_price <- window(Coffee_Prices[,1], start = c(2000, 1))

——– Code Chank 31 ——–

ts_plot(robusta_price,
        title = "The Robusta Coffee Monthly Prices",
        Ytitle = "Price in USD",
        Xtitle = "Year")

——– Code Chank 32 ——–

acf(robusta_price)

# ——– Code Chank 33 ——–

robusta_price_d1 <- diff(robusta_price)

——– Code Chank 34 ——–

par(mfrow=c(1,2))
acf(robusta_price_d1)
pacf(robusta_price_d1)

# ——– Code Chank 35 ——–

robusta_md <- arima(robusta_price, order = c(1, 1, 0))

——– Code Chank 36 ——–

summary(robusta_md)

## 
## Call:
## arima(x = robusta_price, order = c(1, 1, 0))
## 
## Coefficients:
##          ar1
##       0.2780
## s.e.  0.0647
## 
## sigma^2 estimated as 0.007142:  log likelihood = 231.38,  aic = -458.76
## 
## Training set error measures:
##                       ME       RMSE        MAE        MPE     MAPE     MASE
## Training set 0.002595604 0.08432096 0.06494772 0.08104715 4.254984 1.001542
##                     ACF1
## Training set 0.001526295

——– Code Chank 37 ——–

checkresiduals(robusta_md)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)
## Q* = 26.896, df = 23, p-value = 0.2604
## 
## Model df: 1.   Total lags used: 24

——– Code Chank 38 ——–

data(USgas)

——– Code Chank 39 ——–

ts_plot(USgas,
        title = "US Monthly Natural Gas consumption", 
        Ytitle = "Billion Cubic Feet",
        Xtitle = "Year")

——– Code Chank 40 ——–

USgas_split <- ts_split(USgas, sample.out = 12)

train <- USgas_split$train
test <- USgas_split$test

——– Code Chank 41 ——–

par(mfrow=c(1,2))
acf(train, lag.max = 60)
pacf(train, lag.max = 60)

# ——– Code Chank 42 ——–

USgas_d12 <- diff(train, 12)

ts_plot(USgas_d12,
        title = "US Monthly Natural Gas consumption - First Seasonal Difference", 
        Ytitle = "Billion Cubic Feet (First Difference)",
        Xtitle = "Year")

——– Code Chank 43 ——–

USgas_d12_1 <- diff(diff(USgas_d12, 1))

ts_plot(USgas_d12_1,
        title = "US Monthly Natural Gas consumption - First Seasonal and Non-Seasonal Differencing", 
        Ytitle = "Billion Cubic Feet (Difference)",
        Xtitle = "Year")

——– Code Chank 44 ——–

par(mfrow=c(1,2))
acf(USgas_d12_1, lag.max = 60)
pacf(USgas_d12_1, lag.max = 60)

# ——– Code Chank 45 ——–

p <- q <- P <- Q <- 0:2

——– Code Chank 46 ——–

arima_grid <- expand.grid(p,q,P,Q)
names(arima_grid) <- c("p", "q", "P", "Q")
arima_grid$d <- 1
arima_grid$D <- 1

——– Code Chank 47 ——–

arima_grid$k <- rowSums(arima_grid)

——– Code Chank 48 ——–

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

arima_grid <- arima_grid %>% filter(k <= 7)

——– Code Chank 49 ——–

library(dplyr)

arima_grid <- arima_grid %>% filter(k <= 7)
# -------- Code Chank 49 --------
arima_search <- lapply(1:nrow(arima_grid), function(i){
  md <- arima(train, order = c(arima_grid$p[i], 1, arima_grid$q[i]),
              seasonal = list(order = c(arima_grid$P[i], 1, arima_grid$Q[i])),
              method = "CSS")  # Cambiar el método de optimización si es necesario
  results <- data.frame(p = arima_grid$p[i], d = 1, q = arima_grid$q[i],
                        P = arima_grid$P[i], D = 1, Q = arima_grid$Q[i],
                        AIC = md$aic)
}) %>% 
  bind_rows() %>% 
  arrange(AIC)

——– Code Chank 50 ——–

head(arima_search)

##   p d q P D Q AIC
## 1 0 1 0 0 1 0  NA
## 2 1 1 0 0 1 0  NA
## 3 2 1 0 0 1 0  NA
## 4 0 1 1 0 1 0  NA
## 5 1 1 1 0 1 0  NA
## 6 2 1 1 0 1 0  NA

——– Code Chank 51 ——–

USgas_best_md <- arima(train, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
USgas_best_md

## 
## Call:
## arima(x = train, order = c(1, 1, 1), seasonal = list(order = c(2, 1, 1)))
## 
## Coefficients:
##          ar1      ma1    sar1     sar2     sma1
##       0.4247  -0.9180  0.0132  -0.2639  -0.7449
## s.e.  0.0770   0.0376  0.0894   0.0834   0.0753
## 
## sigma^2 estimated as 10160:  log likelihood = -1292.96,  aic = 2597.91

——– Code Chank 52 ——–

USgas_test_fc <- forecast(USgas_best_md, h = 12)

——– Code Chank 53 ——–

accuracy(USgas_test_fc, test)

##                     ME      RMSE      MAE       MPE     MAPE      MASE
## Training set  6.081099  97.85701 73.36854 0.1298714 3.517097 0.6371821
## Test set     42.211254 104.79281 83.09943 1.4913412 3.314280 0.7216918
##                      ACF1 Theil's U
## Training set  0.004565601        NA
## Test set     -0.049999869 0.3469228

——– Code Chank 54 ——–

test_forecast(USgas, 
              forecast.obj = USgas_test_fc,
              test = test)

——– Code Chank 55 ——–

final_md <- arima(USgas, order = c(1,1,1), seasonal = list(order = c(2,1,1)))

——– Code Chank 56 ——–

checkresiduals(final_md)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)(2,1,1)[12]
## Q* = 30.173, df = 19, p-value = 0.04964
## 
## Model df: 5.   Total lags used: 24

——– Code Chank 57 ——–

USgas_fc <- forecast(final_md, h = 12)

——– Code Chank 58 ——–

plot_forecast(USgas_fc,
              title = "US Natural Gas Consumption - Forecast",
              Ytitle = "Billion Cubic Feet",
              Xtitle = "Year")

——– Code Chank 59 ——–

USgas_auto_md1 <- auto.arima(train)
USgas_auto_md1

## Series: train 
## ARIMA(2,1,1)(2,1,1)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1    sar1     sar2     sma1
##       0.4301  -0.0372  -0.9098  0.0118  -0.2673  -0.7431
## s.e.  0.0790   0.0752   0.0452  0.0893   0.0829   0.0751
## 
## sigma^2 = 10446:  log likelihood = -1292.83
## AIC=2599.67   AICc=2600.22   BIC=2623.2

——– Code Chank 60 ——–

USgas_auto_md2 <- auto.arima(train, 
                             max.order = 5,
                             D = 1,
                             d = 1,
                             ic = "aic",
                             stepwise = FALSE, 
                             approximation = FALSE)
USgas_auto_md2

## Series: train 
## ARIMA(1,1,1)(2,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1    sar1     sar2     sma1
##       0.4247  -0.9180  0.0132  -0.2639  -0.7449
## s.e.  0.0770   0.0376  0.0894   0.0834   0.0753
## 
## sigma^2 = 10405:  log likelihood = -1292.96
## AIC=2597.91   AICc=2598.32   BIC=2618.08

——– Code Chank 61 ——–

df <- ts_to_prophet(AirPassengers) %>% setNames(c("date", "y"))

df$lag12 <- dplyr::lag(df$y, n = 12)

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

df$month <- factor(month(df$date, label = TRUE), ordered = FALSE)

df$trend <- 1:nrow(df)

——– Code Chank 62 ——–

par <- ts_split(ts.obj = AirPassengers, sample.out = 12)

train <- par$train

test <- par$test

——– Code Chank 63 ——–

train_df <- df[1:(nrow(df) - 12), ]
test_df <- df[(nrow(df) - 12 + 1):nrow(df), ]

——– Code Chank 64 ——–

md1 <- tslm(train ~ season + trend + lag12, data = train_df)

——– Code Chank 65 ——–

checkresiduals(md1)

## 
##  Breusch-Godfrey test for serial correlation of order up to 24
## 
## data:  Residuals from Linear regression model
## LM test = 83.147, df = 24, p-value = 1.903e-08

——– Code Chank 66 ——–

md2 <- auto.arima(train, 
                  xreg = cbind(model.matrix(~ month,train_df)[,-1], 
                               train_df$trend, 
                               train_df$lag12), 
                  seasonal = TRUE, 
                  stepwise = FALSE, 
                  approximation = FALSE)

——– Code Chank 67 ——–

summary(md2)

## Series: train 
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors 
## 
## Coefficients:
##          ar1     ar2     sar1     sar2  monthFeb  monthMar  monthApr  monthMay
##       0.5849  0.3056  -0.4421  -0.2063   -2.7523    0.8231    0.2066    2.8279
## s.e.  0.0897  0.0903   0.1050   0.1060    1.8417    2.3248    2.4162    2.5560
##       monthJun  monthJul  monthAug  monthSep  monthOct  monthNov  monthDec
##         6.6057   11.2337   12.1909    3.8269    0.6350   -2.2723   -0.9918
## s.e.    3.2916    4.2324    4.1198    2.9243    2.3405    2.4211    1.9172
##                     
##       0.2726  1.0244
## s.e.  0.1367  0.0426
## 
## sigma^2 = 72.82:  log likelihood = -426.93
## AIC=889.86   AICc=896.63   BIC=940.04
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.4117148 8.353629 6.397538 0.2571543 2.549682 0.2100998
##                     ACF1
## Training set 0.005966711

——– Code Chank 68 ——–

checkresiduals(md2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(2,0,0)(2,0,0)[12] errors
## Q* = 25.816, df = 20, p-value = 0.172
## 
## Model df: 4.   Total lags used: 24

——– Code Chank 69 ——–

fc1 <- forecast(md1, newdata = test_df)

fc2 <- forecast(md2, xreg = cbind(model.matrix(~ month,test_df)[,-1], 
                                  test_df$trend, 
                                  test_df$lag12))

——– Code Chank 70 ——–

accuracy(fc1, test)

##                         ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -1.420883e-15 13.99206 11.47136 -0.1200023 4.472154 0.3541758
## Test set      1.079611e+01 20.82782 18.57391  1.9530865 3.828115 0.5734654
##                   ACF1 Theil's U
## Training set 0.7502578        NA
## Test set     0.1653119 0.4052175

accuracy(fc2, test)

##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set   0.4117148  8.353629  6.397538  0.2571543 2.549682 0.2100998
## Test set     -11.2123706 17.928195 13.353910 -2.4898843 2.924174 0.4385521
##                      ACF1 Theil's U
## Training set  0.005966711        NA
## Test set     -0.325044929 0.3923795

Informe 3

Fredys Marmolejo

2024-05-03

——– Code Chank 2 ——–

——– Code Chank 3 ——–

——– Code Chank 4 ——–

——– Code Chank 5 ——–

——– Code Chank 6 ——–

——– Code Chank 7 ——–

——– Code Chank 8 ——–

——– Code Chank 8 ——–

——– Code Chank 9 ——–

——– Code Chank 10 ——–

——– Code Chank 11 ——–

——– Code Chank 12 ——–

——– Code Chank 13 ——–

——– Code Chank 14 ——–

——– Code Chank 15 ——–

——– Code Chank 16 ——–

——– Code Chank 17 ——–

——– Code Chank 18 ——–

——– Code Chank 20 ——–

——– Code Chank 21 ——–

——– Code Chank 22 ——–

——– Code Chank 24 ——–

——– Code Chank 25 ——–

——– Code Chank 26 ——–

——– Code Chank 27 ——–

——– Code Chank 28 ——–

——– Code Chank 29 ——–

——– Code Chank 30 ——–

——– Code Chank 31 ——–

——– Code Chank 32 ——–

——– Code Chank 34 ——–

——– Code Chank 36 ——–

——– Code Chank 37 ——–

——– Code Chank 38 ——–

——– Code Chank 39 ——–

——– Code Chank 40 ——–

——– Code Chank 41 ——–

——– Code Chank 43 ——–

——– Code Chank 44 ——–

——– Code Chank 46 ——–

——– Code Chank 47 ——–

——– Code Chank 48 ——–

——– Code Chank 49 ——–

——– Code Chank 50 ——–

——– Code Chank 51 ——–

——– Code Chank 52 ——–

——– Code Chank 53 ——–

——– Code Chank 54 ——–

——– Code Chank 55 ——–

——– Code Chank 56 ——–

——– Code Chank 57 ——–

——– Code Chank 58 ——–

——– Code Chank 59 ——–

——– Code Chank 60 ——–

——– Code Chank 61 ——–

——– Code Chank 62 ——–

——– Code Chank 63 ——–

——– Code Chank 64 ——–

——– Code Chank 65 ——–

——– Code Chank 66 ——–

——– Code Chank 67 ——–

——– Code Chank 68 ——–

——– Code Chank 69 ——–

——– Code Chank 70 ——–