Modelo Arima

library(forecast)

## Warning: package 'forecast' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(tseries)

## Warning: package 'tseries' was built under R version 4.3.3

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.3.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lubridate)

## Warning: package 'lubridate' was built under R version 4.3.3

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(zoo)

## Warning: package 'zoo' was built under R version 4.3.3

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(xts)

## Warning: package 'xts' was built under R version 4.3.3

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

#Primero cargaremos los datos a través del archivo de Excel y transformar la columna Mes en formato Date

library(readxl)
Base_de_Datos_18_23_Calificadora_de_Riesgo_y_Rentabilidad_ <- read_excel("F:/Master Big Data/TFM/Base de Datos 18-23 (Calificadora de Riesgo y Rentabilidad).xlsx")

Datos <- Base_de_Datos_18_23_Calificadora_de_Riesgo_y_Rentabilidad_

#Debido a que los datos en el Excel están en porcentaje, escalaremos los valores multiplicandolos por 100

datos_porcentajes <- Datos %>%
  mutate_at(vars(-Mes, -AFP), function(x) as.numeric(x) * 100)

print(datos_porcentajes)

## # A tibble: 504 × 10
##    Mes                 AFP    Rentabilidad `C-1`  `C-2` `C-3`   AAA    AA      A
##    <dttm>              <chr>         <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
##  1 2023-12-01 00:00:00 ROMANA         7.31 18.7  0      0      80.8  0     0.529
##  2 2023-12-01 00:00:00 RESER…         8.94  4.33 0.0492 0      72.2  8.27  4.77 
##  3 2023-12-01 00:00:00 POPUL…         9.58  2.97 0.685  0      71.5  8.45  4.53 
##  4 2023-12-01 00:00:00 SIEMB…         8.32  2.90 0.545  0      76.2  9.74  4.58 
##  5 2023-12-01 00:00:00 CRECER         7.81  1.90 0      0      70.0 18.3   2.76 
##  6 2023-12-01 00:00:00 JMMB-…         9.94  1.29 5.29   0      71.6  1.60 16.3  
##  7 2023-12-01 00:00:00 ATLÁN…         7.72  0    0.987  0.570  76.6  2.90 12.9  
##  8 2023-11-01 00:00:00 ROMANA         7.35 19.3  0      0      80.2  0     0.528
##  9 2023-11-01 00:00:00 RESER…         9.05  3.12 0.0715 0      75.9  6.95  4.63 
## 10 2023-11-01 00:00:00 SIEMB…         8.95  2.90 0.505  0      77.2  8.85  4.63 
## # ℹ 494 more rows
## # ℹ 1 more variable: BBB <dbl>

#Para la trasformación de los datos a series temporales, debemos separar los fondos del data set principal de datos, unicamente trabajaremos con la Rentabilidad, por lo que separaremos de cada fondo el Mes y la Rentabilidad.

datos_atlantico <- subset(datos_porcentajes, AFP == "ATLÁNTICO", select = c(Mes, Rentabilidad))
datos_crecer <- subset(datos_porcentajes, AFP == "CRECER", select = c(Mes, Rentabilidad))
datos_Jmmb <- subset(datos_porcentajes, AFP == "JMMB- BDI", select = c(Mes, Rentabilidad))
datos_popular <- subset(datos_porcentajes, AFP == "POPULAR", select = c(Mes, Rentabilidad))
datos_reservas <- subset(datos_porcentajes, AFP == "RESERVAS", select = c(Mes, Rentabilidad))
datos_romana <- subset(datos_porcentajes, AFP == "ROMANA", select = c(Mes, Rentabilidad))
datos_siembra <- subset(datos_porcentajes, AFP == "SIEMBRA", select = c(Mes, Rentabilidad))

#Para crear las series temporales, se crearan columna por columna las series y luego se combinarán

# Obtener la fecha mínima y máxima de la columna 'Mes'
fecha_minima <- min(Datos$Mes)
fecha_maxima <- max(Datos$Mes)

# Crear las series temporales para cada columna
ATLANTICO <- ts(datos_atlantico$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)

SIEMBRA <- ts(datos_siembra$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)

CRECER <- ts(datos_crecer$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)

ROMANA <- ts(datos_romana$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)

RESERVAS <- ts(datos_reservas$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)

POPULAR <- ts(datos_popular$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)

JMMB_BDI <- ts(datos_Jmmb$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)

#Combinando las series en un mismo Data set, para fines de graficar las series temporales

Datos_Serie_temporal <- cbind(ATLANTICO,CRECER,JMMB_BDI,POPULAR,RESERVAS,ROMANA,SIEMBRA)
plot(Datos_Serie_temporal)

#Ahora estadisticamente veremos si estas series son estacionarias o no estacionarias con la prueba Dickey-Fuller

apply_adf_tests <- function(series_list, series_names) {
  if (length(series_list) != length(series_names)) {
    stop("La longitud de series_list y series_names debe ser la misma")
  }

  results <- list()

  for (i in 1:length(series_list)) {
    series <- series_list[[i]]
    name <- series_names[i]
    test_result <- adf.test(series)
    results[[name]] <- test_result
    cat("Prueba Dickey-Fuller para", name, "\n")
    print(test_result)
    if (test_result$p.value < 0.05) {
      cat(name, "es estacionaria (se rechaza la hipótesis nula)\n")
    } else {
      cat(name, "es no estacionaria (no se rechaza la hipótesis nula)\n")
    }
    cat("\n")
  }

  return(results)
}

series_list <- list(ATLANTICO,CRECER,SIEMBRA,ROMANA,RESERVAS,POPULAR,JMMB_BDI)
series_names <- c("ATLANTICO", "CRECER","SIEMBRA","ROMANA","RESERVAS","POPULAR","JMMB_BDI")

Prueba_Dickey_Fuller <- apply_adf_tests(series_list, series_names)

## Prueba Dickey-Fuller para ATLANTICO 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  series
## Dickey-Fuller = -2.1987, Lag order = 4, p-value = 0.4944
## alternative hypothesis: stationary
## 
## ATLANTICO es no estacionaria (no se rechaza la hipótesis nula)
## 
## Prueba Dickey-Fuller para CRECER 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  series
## Dickey-Fuller = -1.8888, Lag order = 4, p-value = 0.6206
## alternative hypothesis: stationary
## 
## CRECER es no estacionaria (no se rechaza la hipótesis nula)
## 
## Prueba Dickey-Fuller para SIEMBRA 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  series
## Dickey-Fuller = -2.642, Lag order = 4, p-value = 0.3139
## alternative hypothesis: stationary
## 
## SIEMBRA es no estacionaria (no se rechaza la hipótesis nula)
## 
## Prueba Dickey-Fuller para ROMANA 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  series
## Dickey-Fuller = -3.549, Lag order = 4, p-value = 0.044
## alternative hypothesis: stationary
## 
## ROMANA es estacionaria (se rechaza la hipótesis nula)
## 
## Prueba Dickey-Fuller para RESERVAS 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  series
## Dickey-Fuller = -2.7278, Lag order = 4, p-value = 0.279
## alternative hypothesis: stationary
## 
## RESERVAS es no estacionaria (no se rechaza la hipótesis nula)
## 
## Prueba Dickey-Fuller para POPULAR 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  series
## Dickey-Fuller = -2.5428, Lag order = 4, p-value = 0.3543
## alternative hypothesis: stationary
## 
## POPULAR es no estacionaria (no se rechaza la hipótesis nula)
## 
## Prueba Dickey-Fuller para JMMB_BDI 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  series
## Dickey-Fuller = -2.5589, Lag order = 4, p-value = 0.3478
## alternative hypothesis: stationary
## 
## JMMB_BDI es no estacionaria (no se rechaza la hipótesis nula)

#Vamos a empezar con todos los fondos no estacionarios a modo experimental, vimos en la prueba Dickey-Fuller que el unico fondo que es estacionario es ROMANA, por lo tanto deberemos diferenciar los otros fondos para que el P-valor esté por debajo de su nivel de significancia 0.05. Primero veremos cuales son las diferenciaciones necesarias para llevar el P-valor por debajo de 0.05. Usaremos la función "diff" dentro de la función "adf.test" para confirmar cuanta diferenciación necesitamos para estas series. 

adf.test(diff(ATLANTICO,differences = 1))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(ATLANTICO, differences = 1)
## Dickey-Fuller = -3.0241, Lag order = 4, p-value = 0.1586
## alternative hypothesis: stationary

adf.test(diff(JMMB_BDI,differences = 1))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(JMMB_BDI, differences = 1)
## Dickey-Fuller = -2.4993, Lag order = 4, p-value = 0.3721
## alternative hypothesis: stationary

adf.test(diff(SIEMBRA,differences = 1))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(SIEMBRA, differences = 1)
## Dickey-Fuller = -2.6051, Lag order = 4, p-value = 0.329
## alternative hypothesis: stationary

adf.test(diff(POPULAR,differences = 1))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(POPULAR, differences = 1)
## Dickey-Fuller = -2.6336, Lag order = 4, p-value = 0.3174
## alternative hypothesis: stationary

adf.test(diff(RESERVAS,differences = 1))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(RESERVAS, differences = 1)
## Dickey-Fuller = -3.2155, Lag order = 4, p-value = 0.09238
## alternative hypothesis: stationary

adf.test(diff(CRECER,differences = 1))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(CRECER, differences = 1)
## Dickey-Fuller = -3.0842, Lag order = 4, p-value = 0.1341
## alternative hypothesis: stationary

#Con una unica diferenciación no logramos una serie estacionaria, por lo que haremos una segunda.

adf.test(diff(ATLANTICO,differences = 2))

## Warning in adf.test(diff(ATLANTICO, differences = 2)): p-value smaller than
## printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(ATLANTICO, differences = 2)
## Dickey-Fuller = -5.26, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

adf.test(diff(JMMB_BDI,differences = 2))

## Warning in adf.test(diff(JMMB_BDI, differences = 2)): p-value smaller than
## printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(JMMB_BDI, differences = 2)
## Dickey-Fuller = -4.6012, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

adf.test(diff(SIEMBRA,differences = 2))

## Warning in adf.test(diff(SIEMBRA, differences = 2)): p-value smaller than
## printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(SIEMBRA, differences = 2)
## Dickey-Fuller = -5.5321, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

adf.test(diff(POPULAR,differences = 2))

## Warning in adf.test(diff(POPULAR, differences = 2)): p-value smaller than
## printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(POPULAR, differences = 2)
## Dickey-Fuller = -5.5666, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

adf.test(diff(RESERVAS,differences = 2))

## Warning in adf.test(diff(RESERVAS, differences = 2)): p-value smaller than
## printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(RESERVAS, differences = 2)
## Dickey-Fuller = -5.0794, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

adf.test(diff(CRECER,differences = 2))

## Warning in adf.test(diff(CRECER, differences = 2)): p-value smaller than
## printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(CRECER, differences = 2)
## Dickey-Fuller = -5.8095, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

#Vemos que con una segunda difenciación si obtenemos una serie estacionaria, esto nos servirá de referencia para el modelo ARIMA que entrenaremos más adelante. Ahora vamos a crear el primer modelo con el fondo ATLANTICO, primero vamos a dividir los datos de entrenamiento y prueba.

trainAtl <- window(ATLANTICO, start = c(2018, 1), end = c(2022, 12))
testAtl <- window(ATLANTICO, start = c(2023, 1))
plot.ts(trainAtl)

#Como vimos anteriormente sobre las diferenciación, todos los fondos a diferencia de ROMANA son estacionarios con dos diferenciaciones, por lo que usaremos la función "auto.arima" para que nos asigne el mejor modelo con dos diferenciaciones, ademas habilitaremos el "trace" para ver cuales otros modelos evaluó y descartó. Pero primero veremso que otro modelo nos puede asignar directamente.

modeloAlt <- auto.arima(trainAtl, trace = TRUE)

## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)            with drift         : 122.5438
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 98.68132
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)                               : 120.4348
##  ARIMA(1,1,0)            with drift         : 117.8188
##  ARIMA(1,1,0)(1,0,1)[12] with drift         : Inf
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)(1,0,0)[12] with drift         : 100.8998
##  ARIMA(2,1,0)(1,0,0)[12] with drift         : 98.1436
##  ARIMA(2,1,0)            with drift         : 117.7558
##  ARIMA(2,1,0)(1,0,1)[12] with drift         : Inf
##  ARIMA(2,1,0)(0,0,1)[12] with drift         : Inf
##  ARIMA(3,1,0)(1,0,0)[12] with drift         : 99.63696
##  ARIMA(2,1,1)(1,0,0)[12] with drift         : 99.98992
##  ARIMA(1,1,1)(1,0,0)[12] with drift         : 97.69789
##  ARIMA(1,1,1)            with drift         : 118.1513
##  ARIMA(1,1,1)(1,0,1)[12] with drift         : Inf
##  ARIMA(1,1,1)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,1)(1,0,0)[12] with drift         : 100.0488
##  ARIMA(1,1,2)(1,0,0)[12] with drift         : 99.92283
##  ARIMA(0,1,2)(1,0,0)[12] with drift         : 99.78638
##  ARIMA(2,1,2)(1,0,0)[12] with drift         : Inf
##  ARIMA(1,1,1)(1,0,0)[12]                    : 95.30658
##  ARIMA(1,1,1)                               : 115.8629
##  ARIMA(1,1,1)(1,0,1)[12]                    : Inf
##  ARIMA(1,1,1)(0,0,1)[12]                    : Inf
##  ARIMA(0,1,1)(1,0,0)[12]                    : 97.77189
##  ARIMA(1,1,0)(1,0,0)[12]                    : 96.39298
##  ARIMA(2,1,1)(1,0,0)[12]                    : 97.50663
##  ARIMA(1,1,2)(1,0,0)[12]                    : 97.44044
##  ARIMA(0,1,0)(1,0,0)[12]                    : 98.72183
##  ARIMA(0,1,2)(1,0,0)[12]                    : 97.40394
##  ARIMA(2,1,0)(1,0,0)[12]                    : 95.75386
##  ARIMA(2,1,2)(1,0,0)[12]                    : Inf
## 
##  Best model: ARIMA(1,1,1)(1,0,0)[12]

modAlt <- forecast(modeloAlt, h=12, level = 0)

checkresiduals(modeloAlt)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)(1,0,0)[12]
## Q* = 13.514, df = 9, p-value = 0.1407
## 
## Model df: 3.   Total lags used: 12

autoplot(modAlt)

plot(modAlt)
lines(testAtl, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

rmseAtl <- sqrt(mean((modAlt$mean - testAtl)^2))
maeAtl <- mean(abs(modAlt$mean - testAtl))
mapeAtl <- mean(abs((modAlt$mean - testAtl) / testAtl)) * 100

cat("RMSE:", rmseAtl, "\n")

## RMSE: 3.475409

cat("MAE:", maeAtl, "\n")

## MAE: 3.236391

cat("MAPE:", mapeAtl, "%\n")

## MAPE: 30.14065 %

#El primer modelo generado automanticamente por auto.arima fue sin diferenciación y nos dió un MAPE del 30.14%, un RMSE de 3.47 y un MAE de 3.23, valores muy alto, vamos a probar con las dos difereciaciones

modeloAlt2 <- auto.arima(trainAtl, d = 2, trace = TRUE)

## 
##  ARIMA(2,2,2)(1,0,1)[12]                    : Inf
##  ARIMA(0,2,0)                               : 135.8397
##  ARIMA(1,2,0)(1,0,0)[12]                    : 104.8751
##  ARIMA(0,2,1)(0,0,1)[12]                    : Inf
##  ARIMA(1,2,0)                               : 123.8771
##  ARIMA(1,2,0)(1,0,1)[12]                    : Inf
##  ARIMA(1,2,0)(0,0,1)[12]                    : Inf
##  ARIMA(0,2,0)(1,0,0)[12]                    : 119.5128
##  ARIMA(2,2,0)(1,0,0)[12]                    : 102.1742
##  ARIMA(2,2,0)                               : 122.8175
##  ARIMA(2,2,0)(1,0,1)[12]                    : Inf
##  ARIMA(2,2,0)(0,0,1)[12]                    : Inf
##  ARIMA(3,2,0)(1,0,0)[12]                    : 104.421
##  ARIMA(2,2,1)(1,0,0)[12]                    : 100.3938
##  ARIMA(2,2,1)                               : Inf
##  ARIMA(2,2,1)(1,0,1)[12]                    : Inf
##  ARIMA(2,2,1)(0,0,1)[12]                    : Inf
##  ARIMA(1,2,1)(1,0,0)[12]                    : 98.56995
##  ARIMA(1,2,1)                               : 120.1677
##  ARIMA(1,2,1)(1,0,1)[12]                    : Inf
##  ARIMA(1,2,1)(0,0,1)[12]                    : Inf
##  ARIMA(0,2,1)(1,0,0)[12]                    : 96.27943
##  ARIMA(0,2,1)                               : 118.8741
##  ARIMA(0,2,1)(1,0,1)[12]                    : Inf
##  ARIMA(0,2,2)(1,0,0)[12]                    : 98.57438
##  ARIMA(1,2,2)(1,0,0)[12]                    : Inf
## 
##  Best model: ARIMA(0,2,1)(1,0,0)[12]

modAlt2 <- forecast(modeloAlt2, h=12, level = 95)

autoplot(modAlt2)

checkresiduals(modAlt2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,2,1)(1,0,0)[12]
## Q* = 14.687, df = 10, p-value = 0.1439
## 
## Model df: 2.   Total lags used: 12

rmseAlt2 <- sqrt(mean((modAlt2$mean - testAtl)^2))
maeAlt2 <- mean(abs(modAlt2$mean - testAtl))
mapeAlt2 <- mean(abs((modAlt2$mean - testAtl) / testAtl)) * 100

cat("RMSE:", rmseAlt2, "\n")

## RMSE: 4.757432

cat("MAE:", maeAlt2, "\n")

## MAE: 4.362603

cat("MAPE:", mapeAlt2, "%\n")

## MAPE: 40.28874 %

#Vemos que el primer modelo con auto.arima sin ajustar diretamente las diferenciaciones dió mejor resultado que con las diferenciaciones ajustadas, por lo que nos quedaremos para el resto de los fondos con un modelo ajustado automaticamente con auto.arima.

plot(modAlt)
lines(testAtl, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

#MODELO CRECER

trainCre <- window(CRECER, start = c(2018, 1), end = c(2022, 12))
testCre <- window(CRECER, start = c(2023, 1))
plot.ts(trainCre)

modeloCre <- auto.arima(trainCre, trace = TRUE)

## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)            with drift         : 130.128
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 106.2605
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)                               : 127.9841
##  ARIMA(1,1,0)            with drift         : 129.1991
##  ARIMA(1,1,0)(1,0,1)[12] with drift         : Inf
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)(1,0,0)[12] with drift         : 106.0099
##  ARIMA(0,1,0)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,1)(1,0,0)[12] with drift         : 106.9135
##  ARIMA(1,1,1)(1,0,0)[12] with drift         : 106.3186
##  ARIMA(0,1,0)(1,0,0)[12]                    : 104.4092
##  ARIMA(0,1,0)(1,0,1)[12]                    : Inf
##  ARIMA(0,1,0)(0,0,1)[12]                    : Inf
##  ARIMA(1,1,0)(1,0,0)[12]                    : 104.3701
##  ARIMA(1,1,0)                               : 126.9788
##  ARIMA(1,1,0)(1,0,1)[12]                    : Inf
##  ARIMA(1,1,0)(0,0,1)[12]                    : Inf
##  ARIMA(2,1,0)(1,0,0)[12]                    : 103.3904
##  ARIMA(2,1,0)                               : 126.2507
##  ARIMA(2,1,0)(1,0,1)[12]                    : Inf
##  ARIMA(2,1,0)(0,0,1)[12]                    : Inf
##  ARIMA(3,1,0)(1,0,0)[12]                    : 105.6114
##  ARIMA(2,1,1)(1,0,0)[12]                    : 105.6722
##  ARIMA(1,1,1)(1,0,0)[12]                    : 104.1323
##  ARIMA(3,1,1)(1,0,0)[12]                    : 108.1076
##  ARIMA(2,1,0)(1,0,0)[12] with drift         : 105.5003
## 
##  Best model: ARIMA(2,1,0)(1,0,0)[12]

modCre <- forecast(modeloCre, h=12, level = 0)

autoplot(modCre)

checkresiduals(modCre)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)(1,0,0)[12]
## Q* = 10.435, df = 9, p-value = 0.3164
## 
## Model df: 3.   Total lags used: 12

rmseCre <- sqrt(mean((modCre$mean - testCre)^2))
maeCre <- mean(abs(modCre$mean - testCre))
mapeCre <- mean(abs((modCre$mean - testCre) / testCre)) * 100

cat("RMSE:", rmseCre, "\n")

## RMSE: 1.091973

cat("MAE:", maeCre, "\n")

## MAE: 1.01794

cat("MAPE:", mapeCre, "%\n")

## MAPE: 10.38951 %

plot(modCre)
lines(testCre, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

#MODELO JMMB_BDI

trainJmmb <- window(JMMB_BDI, start = c(2018, 1), end = c(2022, 12))
testJmmb <- window(JMMB_BDI, start = c(2023, 1))
plot.ts(trainJmmb)

modeloJmmb <- auto.arima(trainJmmb, trace = TRUE)

## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)            with drift         : 72.32021
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 39.87217
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)                               : 70.22339
##  ARIMA(1,1,0)            with drift         : 63.31022
##  ARIMA(1,1,0)(1,0,1)[12] with drift         : 41.10013
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)(1,0,0)[12] with drift         : 42.46818
##  ARIMA(2,1,0)(1,0,0)[12] with drift         : 39.9156
##  ARIMA(1,1,1)(1,0,0)[12] with drift         : 38.40684
##  ARIMA(1,1,1)            with drift         : 63.9268
##  ARIMA(1,1,1)(1,0,1)[12] with drift         : 39.83035
##  ARIMA(1,1,1)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,1)(1,0,0)[12] with drift         : 41.17477
##  ARIMA(2,1,1)(1,0,0)[12] with drift         : 40.60852
##  ARIMA(1,1,2)(1,0,0)[12] with drift         : 40.51885
##  ARIMA(0,1,2)(1,0,0)[12] with drift         : 42.12224
##  ARIMA(2,1,2)(1,0,0)[12] with drift         : Inf
##  ARIMA(1,1,1)(1,0,0)[12]                    : 36.08409
##  ARIMA(1,1,1)                               : 61.63371
##  ARIMA(1,1,1)(1,0,1)[12]                    : 37.42773
##  ARIMA(1,1,1)(0,0,1)[12]                    : Inf
##  ARIMA(0,1,1)(1,0,0)[12]                    : 39.28725
##  ARIMA(1,1,0)(1,0,0)[12]                    : 37.87979
##  ARIMA(2,1,1)(1,0,0)[12]                    : 38.19622
##  ARIMA(1,1,2)(1,0,0)[12]                    : 38.11168
##  ARIMA(0,1,0)(1,0,0)[12]                    : 40.84158
##  ARIMA(0,1,2)(1,0,0)[12]                    : 40.05364
##  ARIMA(2,1,0)(1,0,0)[12]                    : 37.7051
##  ARIMA(2,1,2)(1,0,0)[12]                    : Inf
## 
##  Best model: ARIMA(1,1,1)(1,0,0)[12]

modJmmb <- forecast(modeloJmmb, h=12, level = 0)

autoplot(modJmmb)

checkresiduals(modJmmb)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)(1,0,0)[12]
## Q* = 4.3773, df = 9, p-value = 0.8849
## 
## Model df: 3.   Total lags used: 12

rmseJmmb <- sqrt(mean((modJmmb$mean - testJmmb)^2))
maeJmmb <- mean(abs(modJmmb$mean - testJmmb))
mapeJmmb <- mean(abs((modJmmb$mean - testJmmb) / testJmmb)) * 100

cat("RMSE:", rmseJmmb, "\n")

## RMSE: 0.7750032

cat("MAE:", maeJmmb, "\n")

## MAE: 0.6932141

cat("MAPE:", mapeJmmb, "%\n")

## MAPE: 7.421734 %

plot(modJmmb)
lines(testJmmb, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

#MODELO POPULAR
trainPop <- window(POPULAR, start = c(2018, 1), end = c(2022, 12))
testPop <- window(POPULAR, start = c(2023, 1))
plot.ts(trainPop)

modeloPop <- auto.arima(trainPop, trace = TRUE)

## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)            with drift         : 118.983
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 104.6404
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : 100.8319
##  ARIMA(0,1,0)                               : 116.9803
##  ARIMA(0,1,1)            with drift         : 119.4885
##  ARIMA(0,1,1)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,1)(1,0,0)[12] with drift         : 104.8956
##  ARIMA(0,1,0)(0,0,1)[12] with drift         : 99.81431
##  ARIMA(0,1,0)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)(1,0,0)[12] with drift         : 103.3723
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : 100.4709
##  ARIMA(1,1,1)(0,0,1)[12] with drift         : 100.7629
##  ARIMA(0,1,0)(0,0,1)[12]                    : 97.75238
##  ARIMA(0,1,0)(1,0,1)[12]                    : Inf
##  ARIMA(0,1,0)(1,0,0)[12]                    : 101.2267
##  ARIMA(1,1,0)(0,0,1)[12]                    : 98.24831
##  ARIMA(0,1,1)(0,0,1)[12]                    : 98.63106
##  ARIMA(1,1,1)(0,0,1)[12]                    : 98.37231
## 
##  Best model: ARIMA(0,1,0)(0,0,1)[12]

modPop <- forecast(modeloPop, h=12, level = 0)

autoplot(modPop)

checkresiduals(modPop)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)(0,0,1)[12]
## Q* = 8.3003, df = 11, p-value = 0.6862
## 
## Model df: 1.   Total lags used: 12

rmsePop <- sqrt(mean((modPop$mean - testPop)^2))
maePop <- mean(abs(modPop$mean - testPop))
mapePop <- mean(abs((modPop$mean - testPop) / testPop)) * 100

cat("RMSE:", rmsePop, "\n")

## RMSE: 1.247742

cat("MAE:", maePop, "\n")

## MAE: 1.178774

cat("MAPE:", mapePop, "%\n")

## MAPE: 11.76364 %

plot(modPop, main = "Modelo Arima Popular")
lines(testPop, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

#MODELO RESERVAS
trainRes <- window(RESERVAS, start = c(2018, 1), end = c(2022, 12))
testRes <- window(RESERVAS, start = c(2023, 1))
plot.ts(trainRes)

modeloRes <- auto.arima(trainRes, trace = TRUE)

## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : 91.33855
##  ARIMA(0,1,0)            with drift         : 109.6497
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 83.71072
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : 83.9314
##  ARIMA(0,1,0)                               : 107.59
##  ARIMA(1,1,0)            with drift         : 104.8481
##  ARIMA(1,1,0)(1,0,1)[12] with drift         : 84.54622
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : 83.40574
##  ARIMA(0,1,0)(0,0,1)[12] with drift         : 87.1748
##  ARIMA(2,1,0)(0,0,1)[12] with drift         : 85.70768
##  ARIMA(1,1,1)(0,0,1)[12] with drift         : 85.60747
##  ARIMA(2,1,1)(0,0,1)[12] with drift         : 88.05817
##  ARIMA(1,1,0)(0,0,1)[12]                    : 81.15865
##  ARIMA(1,1,0)                               : 102.6559
##  ARIMA(1,1,0)(1,0,1)[12]                    : 82.27176
##  ARIMA(1,1,0)(1,0,0)[12]                    : 81.50907
##  ARIMA(0,1,0)(0,0,1)[12]                    : 85.06239
##  ARIMA(2,1,0)(0,0,1)[12]                    : 83.3702
##  ARIMA(1,1,1)(0,0,1)[12]                    : 83.26533
##  ARIMA(0,1,1)(0,0,1)[12]                    : 81.69696
##  ARIMA(2,1,1)(0,0,1)[12]                    : 85.62181
## 
##  Best model: ARIMA(1,1,0)(0,0,1)[12]

modRes <- forecast(modeloRes, h=12, level = 0)

autoplot(modRes)

checkresiduals(modRes)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)(0,0,1)[12]
## Q* = 4.0879, df = 10, p-value = 0.9433
## 
## Model df: 2.   Total lags used: 12

rmseRes <- sqrt(mean((modRes$mean - testRes)^2))
maeRes <- mean(abs(modRes$mean - testRes))
mapeRes <- mean(abs((modRes$mean - testRes) / testRes)) * 100

cat("RMSE:", rmseRes, "\n")

## RMSE: 1.17241

cat("MAE:", maeRes, "\n")

## MAE: 1.082183

cat("MAPE:", mapeRes, "%\n")

## MAPE: 10.352 %

plot(modRes)
lines(testRes, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 1)

#MODELO ROMANA
trainRom <- window(ROMANA, start = c(2018, 1), end = c(2022, 12))
testRom <- window(ROMANA, start = c(2023, 1))
plot.ts(trainRom)

modeloRom <- auto.arima(trainRom, trace = TRUE)

## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)            with drift         : 139.0531
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 105.1986
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)                               : 136.9094
##  ARIMA(1,1,0)            with drift         : 126.8135
##  ARIMA(1,1,0)(1,0,1)[12] with drift         : Inf
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)(1,0,0)[12] with drift         : 113.8598
##  ARIMA(2,1,0)(1,0,0)[12] with drift         : 107.5818
##  ARIMA(1,1,1)(1,0,0)[12] with drift         : 106.8396
##  ARIMA(0,1,1)(1,0,0)[12] with drift         : 105.942
##  ARIMA(2,1,1)(1,0,0)[12] with drift         : 109.8954
##  ARIMA(1,1,0)(1,0,0)[12]                    : 103.0515
##  ARIMA(1,1,0)                               : 124.5929
##  ARIMA(1,1,0)(1,0,1)[12]                    : Inf
##  ARIMA(1,1,0)(0,0,1)[12]                    : Inf
##  ARIMA(0,1,0)(1,0,0)[12]                    : 112.1692
##  ARIMA(2,1,0)(1,0,0)[12]                    : 105.3413
##  ARIMA(1,1,1)(1,0,0)[12]                    : 104.4923
##  ARIMA(0,1,1)(1,0,0)[12]                    : 103.9012
##  ARIMA(2,1,1)(1,0,0)[12]                    : 107.5824
## 
##  Best model: ARIMA(1,1,0)(1,0,0)[12]

modRom <- forecast(modeloRom, h=12, level = 0)

autoplot(modRom)

checkresiduals(modRom)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)(1,0,0)[12]
## Q* = 15.051, df = 10, p-value = 0.1302
## 
## Model df: 2.   Total lags used: 12

rmseRom <- sqrt(mean((modRom$mean - testRom)^2))
maeRom <- mean(abs(modRom$mean - testRom))
mapeRom <- mean(abs((modRom$mean - testRom) / testRom)) * 100

cat("RMSE:", rmseRom, "\n")

## RMSE: 1.997691

cat("MAE:", maeRom, "\n")

## MAE: 1.907074

cat("MAPE:", mapeRom, "%\n")

## MAPE: 19.38354 %

plot(modRom)
lines(testRom, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 1)

#MODELO SIEMBRA
trainSiem <- window(SIEMBRA, start = c(2018, 1), end = c(2022, 12))
testSiem <- window(SIEMBRA, start = c(2023, 1))
plot.ts(trainSiem)

modeloSiem <- auto.arima(trainSiem, trace = TRUE)

## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)            with drift         : 130.9177
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 117.8469
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)                               : 128.7858
##  ARIMA(1,1,0)            with drift         : 131.3985
##  ARIMA(1,1,0)(1,0,1)[12] with drift         : Inf
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)(1,0,0)[12] with drift         : 116.5544
##  ARIMA(0,1,0)(1,0,1)[12] with drift         : Inf
##  ARIMA(0,1,0)(0,0,1)[12] with drift         : Inf
##  ARIMA(0,1,1)(1,0,0)[12] with drift         : 117.9567
##  ARIMA(1,1,1)(1,0,0)[12] with drift         : 119.5965
##  ARIMA(0,1,0)(1,0,0)[12]                    : 114.5842
##  ARIMA(0,1,0)(1,0,1)[12]                    : Inf
##  ARIMA(0,1,0)(0,0,1)[12]                    : Inf
##  ARIMA(1,1,0)(1,0,0)[12]                    : 115.7381
##  ARIMA(0,1,1)(1,0,0)[12]                    : 115.8559
##  ARIMA(1,1,1)(1,0,0)[12]                    : 117.3104
## 
##  Best model: ARIMA(0,1,0)(1,0,0)[12]

modSiem <- forecast(modeloSiem, h=12, level = 0)

autoplot(modSiem)

checkresiduals(modSiem)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)(1,0,0)[12]
## Q* = 9.4136, df = 11, p-value = 0.5838
## 
## Model df: 1.   Total lags used: 12

rmseSiem <- sqrt(mean((modSiem$mean - testSiem)^2))
maeSiem <- mean(abs(modSiem$mean - testSiem))
mapeSiem <- mean(abs((modSiem$mean - testSiem) / testSiem)) * 100

cat("RMSE:", rmseSiem, "\n")

## RMSE: 2.162868

cat("MAE:", maeSiem, "\n")

## MAE: 2.038901

cat("MAPE:", mapeSiem, "%\n")

## MAPE: 18.95748 %

plot(modSiem)
lines(testSiem, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 1)

library(cowplot)

## Warning: package 'cowplot' was built under R version 4.3.3

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:lubridate':
## 
##     stamp

library(gridExtra)

## Warning: package 'gridExtra' was built under R version 4.3.3

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(forecast)
library(ggplot2)
library(cowplot)

#Ahora a guardar los graficos y luego combinarlos

grafico1 <- autoplot(modAlt, ylab = "Rentabilidad") + labs(title = "Modelo Arima Atlántico")
grafico2 <- autoplot(modRom, ylab = "Rentabilidad") + labs(title = "Modelo Arima Romana")
grafico3 <- autoplot(modCre, ylab = "Rentabilidad") + labs(title = "Modelo Arima Crecer")
grafico4 <- autoplot(modRes, ylab = "Rentabilidad") + labs(title = "Modelo Arima Reservas")
grafico5 <- autoplot(modJmmb, ylab = "Rentabilidad") + labs(title = "Modelo Arima Jmmb-BDI")
grafico6 <- autoplot(modPop, ylab = "Rentabilidad") + labs(title = "Modelo Arima Popular")
grafico7 <- autoplot(modSiem, ylab = "Rentabilidad") + labs(title = "Modelo Arima Siembra")

#Combinando los graficos con sus datos de pruebas y datos pronosticados

grafico_prueba <- autoplot(testAtl) + labs(title = "Prueba")
datos_prueba <- ggplot_build(grafico_prueba)$data[[1]]
grafico1 <- grafico1 + geom_line(data = datos_prueba, aes(x = x, y = y), color ="red")

grafico_prueba2 <- autoplot(testRom) + labs(title = "Prueba")
datos_prueba2 <- ggplot_build(grafico_prueba2)$data[[1]]
grafico2 <- grafico2 + geom_line(data = datos_prueba2, aes(x = x, y = y), color ="red")

grafico_prueba3 <- autoplot(testCre) + labs(title = "Prueba")
datos_prueba3 <- ggplot_build(grafico_prueba3)$data[[1]]
grafico3 <- grafico3 + geom_line(data = datos_prueba3, aes(x = x, y = y), color ="red")

grafico_prueba4 <- autoplot(testRes) + labs(title = "Prueba")
datos_prueba4 <- ggplot_build(grafico_prueba4)$data[[1]]
grafico4 <- grafico4 + geom_line(data = datos_prueba4, aes(x = x, y = y), color ="red")

grafico_prueba5 <- autoplot(testJmmb) + labs(title = "Prueba")
datos_prueba5 <- ggplot_build(grafico_prueba5)$data[[1]]
grafico5 <- grafico5 + geom_line(data = datos_prueba5, aes(x = x, y = y), color ="red")

grafico_prueba6 <- autoplot(testPop) + labs(title = "Prueba")
datos_prueba6 <- ggplot_build(grafico_prueba6)$data[[1]]
grafico6 <- grafico6 + geom_line(data = datos_prueba6, aes(x = x, y = y), color ="red")

grafico_prueba7 <- autoplot(testSiem) + labs(title = "Prueba")
datos_prueba7 <- ggplot_build(grafico_prueba7)$data[[1]]
grafico7 <- grafico7 + geom_line(data = datos_prueba7, aes(x = x, y = y), color ="red")


# Combinar los gráficos en mosaicos separados con pronosticos vs pruebas

plot_grid(grafico1, grafico2,grafico3,grafico4,grafico5,grafico6, grafico7, nrow = 3)

Modelo Arima

Nestese

2024-05-03