library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
## Warning: package 'tseries' was built under R version 4.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(zoo)
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(xts)
## Warning: package 'xts' was built under R version 4.3.3
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
#Primero cargaremos los datos a través del archivo de Excel y transformar la columna Mes en formato Date
library(readxl)
Base_de_Datos_18_23_Calificadora_de_Riesgo_y_Rentabilidad_ <- read_excel("F:/Master Big Data/TFM/Base de Datos 18-23 (Calificadora de Riesgo y Rentabilidad).xlsx")
Datos <- Base_de_Datos_18_23_Calificadora_de_Riesgo_y_Rentabilidad_
#Debido a que los datos en el Excel están en porcentaje, escalaremos los valores multiplicandolos por 100
datos_porcentajes <- Datos %>%
mutate_at(vars(-Mes, -AFP), function(x) as.numeric(x) * 100)
print(datos_porcentajes)
## # A tibble: 504 × 10
## Mes AFP Rentabilidad `C-1` `C-2` `C-3` AAA AA A
## <dttm> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2023-12-01 00:00:00 ROMANA 7.31 18.7 0 0 80.8 0 0.529
## 2 2023-12-01 00:00:00 RESER… 8.94 4.33 0.0492 0 72.2 8.27 4.77
## 3 2023-12-01 00:00:00 POPUL… 9.58 2.97 0.685 0 71.5 8.45 4.53
## 4 2023-12-01 00:00:00 SIEMB… 8.32 2.90 0.545 0 76.2 9.74 4.58
## 5 2023-12-01 00:00:00 CRECER 7.81 1.90 0 0 70.0 18.3 2.76
## 6 2023-12-01 00:00:00 JMMB-… 9.94 1.29 5.29 0 71.6 1.60 16.3
## 7 2023-12-01 00:00:00 ATLÁN… 7.72 0 0.987 0.570 76.6 2.90 12.9
## 8 2023-11-01 00:00:00 ROMANA 7.35 19.3 0 0 80.2 0 0.528
## 9 2023-11-01 00:00:00 RESER… 9.05 3.12 0.0715 0 75.9 6.95 4.63
## 10 2023-11-01 00:00:00 SIEMB… 8.95 2.90 0.505 0 77.2 8.85 4.63
## # ℹ 494 more rows
## # ℹ 1 more variable: BBB <dbl>
#Para la trasformación de los datos a series temporales, debemos separar los fondos del data set principal de datos, unicamente trabajaremos con la Rentabilidad, por lo que separaremos de cada fondo el Mes y la Rentabilidad.
datos_atlantico <- subset(datos_porcentajes, AFP == "ATLÁNTICO", select = c(Mes, Rentabilidad))
datos_crecer <- subset(datos_porcentajes, AFP == "CRECER", select = c(Mes, Rentabilidad))
datos_Jmmb <- subset(datos_porcentajes, AFP == "JMMB- BDI", select = c(Mes, Rentabilidad))
datos_popular <- subset(datos_porcentajes, AFP == "POPULAR", select = c(Mes, Rentabilidad))
datos_reservas <- subset(datos_porcentajes, AFP == "RESERVAS", select = c(Mes, Rentabilidad))
datos_romana <- subset(datos_porcentajes, AFP == "ROMANA", select = c(Mes, Rentabilidad))
datos_siembra <- subset(datos_porcentajes, AFP == "SIEMBRA", select = c(Mes, Rentabilidad))
#Para crear las series temporales, se crearan columna por columna las series y luego se combinarán
# Obtener la fecha mínima y máxima de la columna 'Mes'
fecha_minima <- min(Datos$Mes)
fecha_maxima <- max(Datos$Mes)
# Crear las series temporales para cada columna
ATLANTICO <- ts(datos_atlantico$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)
SIEMBRA <- ts(datos_siembra$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)
CRECER <- ts(datos_crecer$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)
ROMANA <- ts(datos_romana$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)
RESERVAS <- ts(datos_reservas$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)
POPULAR <- ts(datos_popular$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)
JMMB_BDI <- ts(datos_Jmmb$Rentabilidad, start = c(year(fecha_minima), month(fecha_minima)), end = c(year(fecha_maxima), month(fecha_maxima)), frequency = 12)
#Combinando las series en un mismo Data set, para fines de graficar las series temporales
Datos_Serie_temporal <- cbind(ATLANTICO,CRECER,JMMB_BDI,POPULAR,RESERVAS,ROMANA,SIEMBRA)
plot(Datos_Serie_temporal)

#Ahora estadisticamente veremos si estas series son estacionarias o no estacionarias con la prueba Dickey-Fuller
apply_adf_tests <- function(series_list, series_names) {
if (length(series_list) != length(series_names)) {
stop("La longitud de series_list y series_names debe ser la misma")
}
results <- list()
for (i in 1:length(series_list)) {
series <- series_list[[i]]
name <- series_names[i]
test_result <- adf.test(series)
results[[name]] <- test_result
cat("Prueba Dickey-Fuller para", name, "\n")
print(test_result)
if (test_result$p.value < 0.05) {
cat(name, "es estacionaria (se rechaza la hipótesis nula)\n")
} else {
cat(name, "es no estacionaria (no se rechaza la hipótesis nula)\n")
}
cat("\n")
}
return(results)
}
series_list <- list(ATLANTICO,CRECER,SIEMBRA,ROMANA,RESERVAS,POPULAR,JMMB_BDI)
series_names <- c("ATLANTICO", "CRECER","SIEMBRA","ROMANA","RESERVAS","POPULAR","JMMB_BDI")
Prueba_Dickey_Fuller <- apply_adf_tests(series_list, series_names)
## Prueba Dickey-Fuller para ATLANTICO
##
## Augmented Dickey-Fuller Test
##
## data: series
## Dickey-Fuller = -2.1987, Lag order = 4, p-value = 0.4944
## alternative hypothesis: stationary
##
## ATLANTICO es no estacionaria (no se rechaza la hipótesis nula)
##
## Prueba Dickey-Fuller para CRECER
##
## Augmented Dickey-Fuller Test
##
## data: series
## Dickey-Fuller = -1.8888, Lag order = 4, p-value = 0.6206
## alternative hypothesis: stationary
##
## CRECER es no estacionaria (no se rechaza la hipótesis nula)
##
## Prueba Dickey-Fuller para SIEMBRA
##
## Augmented Dickey-Fuller Test
##
## data: series
## Dickey-Fuller = -2.642, Lag order = 4, p-value = 0.3139
## alternative hypothesis: stationary
##
## SIEMBRA es no estacionaria (no se rechaza la hipótesis nula)
##
## Prueba Dickey-Fuller para ROMANA
##
## Augmented Dickey-Fuller Test
##
## data: series
## Dickey-Fuller = -3.549, Lag order = 4, p-value = 0.044
## alternative hypothesis: stationary
##
## ROMANA es estacionaria (se rechaza la hipótesis nula)
##
## Prueba Dickey-Fuller para RESERVAS
##
## Augmented Dickey-Fuller Test
##
## data: series
## Dickey-Fuller = -2.7278, Lag order = 4, p-value = 0.279
## alternative hypothesis: stationary
##
## RESERVAS es no estacionaria (no se rechaza la hipótesis nula)
##
## Prueba Dickey-Fuller para POPULAR
##
## Augmented Dickey-Fuller Test
##
## data: series
## Dickey-Fuller = -2.5428, Lag order = 4, p-value = 0.3543
## alternative hypothesis: stationary
##
## POPULAR es no estacionaria (no se rechaza la hipótesis nula)
##
## Prueba Dickey-Fuller para JMMB_BDI
##
## Augmented Dickey-Fuller Test
##
## data: series
## Dickey-Fuller = -2.5589, Lag order = 4, p-value = 0.3478
## alternative hypothesis: stationary
##
## JMMB_BDI es no estacionaria (no se rechaza la hipótesis nula)
#Vamos a empezar con todos los fondos no estacionarios a modo experimental, vimos en la prueba Dickey-Fuller que el unico fondo que es estacionario es ROMANA, por lo tanto deberemos diferenciar los otros fondos para que el P-valor esté por debajo de su nivel de significancia 0.05. Primero veremos cuales son las diferenciaciones necesarias para llevar el P-valor por debajo de 0.05. Usaremos la función "diff" dentro de la función "adf.test" para confirmar cuanta diferenciación necesitamos para estas series.
adf.test(diff(ATLANTICO,differences = 1))
##
## Augmented Dickey-Fuller Test
##
## data: diff(ATLANTICO, differences = 1)
## Dickey-Fuller = -3.0241, Lag order = 4, p-value = 0.1586
## alternative hypothesis: stationary
adf.test(diff(JMMB_BDI,differences = 1))
##
## Augmented Dickey-Fuller Test
##
## data: diff(JMMB_BDI, differences = 1)
## Dickey-Fuller = -2.4993, Lag order = 4, p-value = 0.3721
## alternative hypothesis: stationary
adf.test(diff(SIEMBRA,differences = 1))
##
## Augmented Dickey-Fuller Test
##
## data: diff(SIEMBRA, differences = 1)
## Dickey-Fuller = -2.6051, Lag order = 4, p-value = 0.329
## alternative hypothesis: stationary
adf.test(diff(POPULAR,differences = 1))
##
## Augmented Dickey-Fuller Test
##
## data: diff(POPULAR, differences = 1)
## Dickey-Fuller = -2.6336, Lag order = 4, p-value = 0.3174
## alternative hypothesis: stationary
adf.test(diff(RESERVAS,differences = 1))
##
## Augmented Dickey-Fuller Test
##
## data: diff(RESERVAS, differences = 1)
## Dickey-Fuller = -3.2155, Lag order = 4, p-value = 0.09238
## alternative hypothesis: stationary
adf.test(diff(CRECER,differences = 1))
##
## Augmented Dickey-Fuller Test
##
## data: diff(CRECER, differences = 1)
## Dickey-Fuller = -3.0842, Lag order = 4, p-value = 0.1341
## alternative hypothesis: stationary
#Con una unica diferenciación no logramos una serie estacionaria, por lo que haremos una segunda.
adf.test(diff(ATLANTICO,differences = 2))
## Warning in adf.test(diff(ATLANTICO, differences = 2)): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(ATLANTICO, differences = 2)
## Dickey-Fuller = -5.26, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
adf.test(diff(JMMB_BDI,differences = 2))
## Warning in adf.test(diff(JMMB_BDI, differences = 2)): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(JMMB_BDI, differences = 2)
## Dickey-Fuller = -4.6012, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
adf.test(diff(SIEMBRA,differences = 2))
## Warning in adf.test(diff(SIEMBRA, differences = 2)): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(SIEMBRA, differences = 2)
## Dickey-Fuller = -5.5321, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
adf.test(diff(POPULAR,differences = 2))
## Warning in adf.test(diff(POPULAR, differences = 2)): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(POPULAR, differences = 2)
## Dickey-Fuller = -5.5666, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
adf.test(diff(RESERVAS,differences = 2))
## Warning in adf.test(diff(RESERVAS, differences = 2)): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(RESERVAS, differences = 2)
## Dickey-Fuller = -5.0794, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
adf.test(diff(CRECER,differences = 2))
## Warning in adf.test(diff(CRECER, differences = 2)): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(CRECER, differences = 2)
## Dickey-Fuller = -5.8095, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#Vemos que con una segunda difenciación si obtenemos una serie estacionaria, esto nos servirá de referencia para el modelo ARIMA que entrenaremos más adelante. Ahora vamos a crear el primer modelo con el fondo ATLANTICO, primero vamos a dividir los datos de entrenamiento y prueba.
trainAtl <- window(ATLANTICO, start = c(2018, 1), end = c(2022, 12))
testAtl <- window(ATLANTICO, start = c(2023, 1))
plot.ts(trainAtl)

#Como vimos anteriormente sobre las diferenciación, todos los fondos a diferencia de ROMANA son estacionarios con dos diferenciaciones, por lo que usaremos la función "auto.arima" para que nos asigne el mejor modelo con dos diferenciaciones, ademas habilitaremos el "trace" para ver cuales otros modelos evaluó y descartó. Pero primero veremso que otro modelo nos puede asignar directamente.
modeloAlt <- auto.arima(trainAtl, trace = TRUE)
##
## ARIMA(2,1,2)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,0) with drift : 122.5438
## ARIMA(1,1,0)(1,0,0)[12] with drift : 98.68132
## ARIMA(0,1,1)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0) : 120.4348
## ARIMA(1,1,0) with drift : 117.8188
## ARIMA(1,1,0)(1,0,1)[12] with drift : Inf
## ARIMA(1,1,0)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0)(1,0,0)[12] with drift : 100.8998
## ARIMA(2,1,0)(1,0,0)[12] with drift : 98.1436
## ARIMA(2,1,0) with drift : 117.7558
## ARIMA(2,1,0)(1,0,1)[12] with drift : Inf
## ARIMA(2,1,0)(0,0,1)[12] with drift : Inf
## ARIMA(3,1,0)(1,0,0)[12] with drift : 99.63696
## ARIMA(2,1,1)(1,0,0)[12] with drift : 99.98992
## ARIMA(1,1,1)(1,0,0)[12] with drift : 97.69789
## ARIMA(1,1,1) with drift : 118.1513
## ARIMA(1,1,1)(1,0,1)[12] with drift : Inf
## ARIMA(1,1,1)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,1)(1,0,0)[12] with drift : 100.0488
## ARIMA(1,1,2)(1,0,0)[12] with drift : 99.92283
## ARIMA(0,1,2)(1,0,0)[12] with drift : 99.78638
## ARIMA(2,1,2)(1,0,0)[12] with drift : Inf
## ARIMA(1,1,1)(1,0,0)[12] : 95.30658
## ARIMA(1,1,1) : 115.8629
## ARIMA(1,1,1)(1,0,1)[12] : Inf
## ARIMA(1,1,1)(0,0,1)[12] : Inf
## ARIMA(0,1,1)(1,0,0)[12] : 97.77189
## ARIMA(1,1,0)(1,0,0)[12] : 96.39298
## ARIMA(2,1,1)(1,0,0)[12] : 97.50663
## ARIMA(1,1,2)(1,0,0)[12] : 97.44044
## ARIMA(0,1,0)(1,0,0)[12] : 98.72183
## ARIMA(0,1,2)(1,0,0)[12] : 97.40394
## ARIMA(2,1,0)(1,0,0)[12] : 95.75386
## ARIMA(2,1,2)(1,0,0)[12] : Inf
##
## Best model: ARIMA(1,1,1)(1,0,0)[12]
modAlt <- forecast(modeloAlt, h=12, level = 0)
checkresiduals(modeloAlt)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(1,0,0)[12]
## Q* = 13.514, df = 9, p-value = 0.1407
##
## Model df: 3. Total lags used: 12
autoplot(modAlt)

plot(modAlt)
lines(testAtl, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

rmseAtl <- sqrt(mean((modAlt$mean - testAtl)^2))
maeAtl <- mean(abs(modAlt$mean - testAtl))
mapeAtl <- mean(abs((modAlt$mean - testAtl) / testAtl)) * 100
cat("RMSE:", rmseAtl, "\n")
## RMSE: 3.475409
cat("MAE:", maeAtl, "\n")
## MAE: 3.236391
cat("MAPE:", mapeAtl, "%\n")
## MAPE: 30.14065 %
#El primer modelo generado automanticamente por auto.arima fue sin diferenciación y nos dió un MAPE del 30.14%, un RMSE de 3.47 y un MAE de 3.23, valores muy alto, vamos a probar con las dos difereciaciones
modeloAlt2 <- auto.arima(trainAtl, d = 2, trace = TRUE)
##
## ARIMA(2,2,2)(1,0,1)[12] : Inf
## ARIMA(0,2,0) : 135.8397
## ARIMA(1,2,0)(1,0,0)[12] : 104.8751
## ARIMA(0,2,1)(0,0,1)[12] : Inf
## ARIMA(1,2,0) : 123.8771
## ARIMA(1,2,0)(1,0,1)[12] : Inf
## ARIMA(1,2,0)(0,0,1)[12] : Inf
## ARIMA(0,2,0)(1,0,0)[12] : 119.5128
## ARIMA(2,2,0)(1,0,0)[12] : 102.1742
## ARIMA(2,2,0) : 122.8175
## ARIMA(2,2,0)(1,0,1)[12] : Inf
## ARIMA(2,2,0)(0,0,1)[12] : Inf
## ARIMA(3,2,0)(1,0,0)[12] : 104.421
## ARIMA(2,2,1)(1,0,0)[12] : 100.3938
## ARIMA(2,2,1) : Inf
## ARIMA(2,2,1)(1,0,1)[12] : Inf
## ARIMA(2,2,1)(0,0,1)[12] : Inf
## ARIMA(1,2,1)(1,0,0)[12] : 98.56995
## ARIMA(1,2,1) : 120.1677
## ARIMA(1,2,1)(1,0,1)[12] : Inf
## ARIMA(1,2,1)(0,0,1)[12] : Inf
## ARIMA(0,2,1)(1,0,0)[12] : 96.27943
## ARIMA(0,2,1) : 118.8741
## ARIMA(0,2,1)(1,0,1)[12] : Inf
## ARIMA(0,2,2)(1,0,0)[12] : 98.57438
## ARIMA(1,2,2)(1,0,0)[12] : Inf
##
## Best model: ARIMA(0,2,1)(1,0,0)[12]
modAlt2 <- forecast(modeloAlt2, h=12, level = 95)
autoplot(modAlt2)

checkresiduals(modAlt2)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,1)(1,0,0)[12]
## Q* = 14.687, df = 10, p-value = 0.1439
##
## Model df: 2. Total lags used: 12
rmseAlt2 <- sqrt(mean((modAlt2$mean - testAtl)^2))
maeAlt2 <- mean(abs(modAlt2$mean - testAtl))
mapeAlt2 <- mean(abs((modAlt2$mean - testAtl) / testAtl)) * 100
cat("RMSE:", rmseAlt2, "\n")
## RMSE: 4.757432
cat("MAE:", maeAlt2, "\n")
## MAE: 4.362603
cat("MAPE:", mapeAlt2, "%\n")
## MAPE: 40.28874 %
#Vemos que el primer modelo con auto.arima sin ajustar diretamente las diferenciaciones dió mejor resultado que con las diferenciaciones ajustadas, por lo que nos quedaremos para el resto de los fondos con un modelo ajustado automaticamente con auto.arima.
plot(modAlt)
lines(testAtl, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

#MODELO CRECER
trainCre <- window(CRECER, start = c(2018, 1), end = c(2022, 12))
testCre <- window(CRECER, start = c(2023, 1))
plot.ts(trainCre)

modeloCre <- auto.arima(trainCre, trace = TRUE)
##
## ARIMA(2,1,2)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,0) with drift : 130.128
## ARIMA(1,1,0)(1,0,0)[12] with drift : 106.2605
## ARIMA(0,1,1)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0) : 127.9841
## ARIMA(1,1,0) with drift : 129.1991
## ARIMA(1,1,0)(1,0,1)[12] with drift : Inf
## ARIMA(1,1,0)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0)(1,0,0)[12] with drift : 106.0099
## ARIMA(0,1,0)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,0)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,1)(1,0,0)[12] with drift : 106.9135
## ARIMA(1,1,1)(1,0,0)[12] with drift : 106.3186
## ARIMA(0,1,0)(1,0,0)[12] : 104.4092
## ARIMA(0,1,0)(1,0,1)[12] : Inf
## ARIMA(0,1,0)(0,0,1)[12] : Inf
## ARIMA(1,1,0)(1,0,0)[12] : 104.3701
## ARIMA(1,1,0) : 126.9788
## ARIMA(1,1,0)(1,0,1)[12] : Inf
## ARIMA(1,1,0)(0,0,1)[12] : Inf
## ARIMA(2,1,0)(1,0,0)[12] : 103.3904
## ARIMA(2,1,0) : 126.2507
## ARIMA(2,1,0)(1,0,1)[12] : Inf
## ARIMA(2,1,0)(0,0,1)[12] : Inf
## ARIMA(3,1,0)(1,0,0)[12] : 105.6114
## ARIMA(2,1,1)(1,0,0)[12] : 105.6722
## ARIMA(1,1,1)(1,0,0)[12] : 104.1323
## ARIMA(3,1,1)(1,0,0)[12] : 108.1076
## ARIMA(2,1,0)(1,0,0)[12] with drift : 105.5003
##
## Best model: ARIMA(2,1,0)(1,0,0)[12]
modCre <- forecast(modeloCre, h=12, level = 0)
autoplot(modCre)

checkresiduals(modCre)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)(1,0,0)[12]
## Q* = 10.435, df = 9, p-value = 0.3164
##
## Model df: 3. Total lags used: 12
rmseCre <- sqrt(mean((modCre$mean - testCre)^2))
maeCre <- mean(abs(modCre$mean - testCre))
mapeCre <- mean(abs((modCre$mean - testCre) / testCre)) * 100
cat("RMSE:", rmseCre, "\n")
## RMSE: 1.091973
cat("MAE:", maeCre, "\n")
## MAE: 1.01794
cat("MAPE:", mapeCre, "%\n")
## MAPE: 10.38951 %
plot(modCre)
lines(testCre, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

#MODELO JMMB_BDI
trainJmmb <- window(JMMB_BDI, start = c(2018, 1), end = c(2022, 12))
testJmmb <- window(JMMB_BDI, start = c(2023, 1))
plot.ts(trainJmmb)

modeloJmmb <- auto.arima(trainJmmb, trace = TRUE)
##
## ARIMA(2,1,2)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,0) with drift : 72.32021
## ARIMA(1,1,0)(1,0,0)[12] with drift : 39.87217
## ARIMA(0,1,1)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0) : 70.22339
## ARIMA(1,1,0) with drift : 63.31022
## ARIMA(1,1,0)(1,0,1)[12] with drift : 41.10013
## ARIMA(1,1,0)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0)(1,0,0)[12] with drift : 42.46818
## ARIMA(2,1,0)(1,0,0)[12] with drift : 39.9156
## ARIMA(1,1,1)(1,0,0)[12] with drift : 38.40684
## ARIMA(1,1,1) with drift : 63.9268
## ARIMA(1,1,1)(1,0,1)[12] with drift : 39.83035
## ARIMA(1,1,1)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,1)(1,0,0)[12] with drift : 41.17477
## ARIMA(2,1,1)(1,0,0)[12] with drift : 40.60852
## ARIMA(1,1,2)(1,0,0)[12] with drift : 40.51885
## ARIMA(0,1,2)(1,0,0)[12] with drift : 42.12224
## ARIMA(2,1,2)(1,0,0)[12] with drift : Inf
## ARIMA(1,1,1)(1,0,0)[12] : 36.08409
## ARIMA(1,1,1) : 61.63371
## ARIMA(1,1,1)(1,0,1)[12] : 37.42773
## ARIMA(1,1,1)(0,0,1)[12] : Inf
## ARIMA(0,1,1)(1,0,0)[12] : 39.28725
## ARIMA(1,1,0)(1,0,0)[12] : 37.87979
## ARIMA(2,1,1)(1,0,0)[12] : 38.19622
## ARIMA(1,1,2)(1,0,0)[12] : 38.11168
## ARIMA(0,1,0)(1,0,0)[12] : 40.84158
## ARIMA(0,1,2)(1,0,0)[12] : 40.05364
## ARIMA(2,1,0)(1,0,0)[12] : 37.7051
## ARIMA(2,1,2)(1,0,0)[12] : Inf
##
## Best model: ARIMA(1,1,1)(1,0,0)[12]
modJmmb <- forecast(modeloJmmb, h=12, level = 0)
autoplot(modJmmb)

checkresiduals(modJmmb)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(1,0,0)[12]
## Q* = 4.3773, df = 9, p-value = 0.8849
##
## Model df: 3. Total lags used: 12
rmseJmmb <- sqrt(mean((modJmmb$mean - testJmmb)^2))
maeJmmb <- mean(abs(modJmmb$mean - testJmmb))
mapeJmmb <- mean(abs((modJmmb$mean - testJmmb) / testJmmb)) * 100
cat("RMSE:", rmseJmmb, "\n")
## RMSE: 0.7750032
cat("MAE:", maeJmmb, "\n")
## MAE: 0.6932141
cat("MAPE:", mapeJmmb, "%\n")
## MAPE: 7.421734 %
plot(modJmmb)
lines(testJmmb, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

#MODELO POPULAR
trainPop <- window(POPULAR, start = c(2018, 1), end = c(2022, 12))
testPop <- window(POPULAR, start = c(2023, 1))
plot.ts(trainPop)

modeloPop <- auto.arima(trainPop, trace = TRUE)
##
## ARIMA(2,1,2)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,0) with drift : 118.983
## ARIMA(1,1,0)(1,0,0)[12] with drift : 104.6404
## ARIMA(0,1,1)(0,0,1)[12] with drift : 100.8319
## ARIMA(0,1,0) : 116.9803
## ARIMA(0,1,1) with drift : 119.4885
## ARIMA(0,1,1)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,1)(1,0,0)[12] with drift : 104.8956
## ARIMA(0,1,0)(0,0,1)[12] with drift : 99.81431
## ARIMA(0,1,0)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,0)(1,0,0)[12] with drift : 103.3723
## ARIMA(1,1,0)(0,0,1)[12] with drift : 100.4709
## ARIMA(1,1,1)(0,0,1)[12] with drift : 100.7629
## ARIMA(0,1,0)(0,0,1)[12] : 97.75238
## ARIMA(0,1,0)(1,0,1)[12] : Inf
## ARIMA(0,1,0)(1,0,0)[12] : 101.2267
## ARIMA(1,1,0)(0,0,1)[12] : 98.24831
## ARIMA(0,1,1)(0,0,1)[12] : 98.63106
## ARIMA(1,1,1)(0,0,1)[12] : 98.37231
##
## Best model: ARIMA(0,1,0)(0,0,1)[12]
modPop <- forecast(modeloPop, h=12, level = 0)
autoplot(modPop)

checkresiduals(modPop)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)(0,0,1)[12]
## Q* = 8.3003, df = 11, p-value = 0.6862
##
## Model df: 1. Total lags used: 12
rmsePop <- sqrt(mean((modPop$mean - testPop)^2))
maePop <- mean(abs(modPop$mean - testPop))
mapePop <- mean(abs((modPop$mean - testPop) / testPop)) * 100
cat("RMSE:", rmsePop, "\n")
## RMSE: 1.247742
cat("MAE:", maePop, "\n")
## MAE: 1.178774
cat("MAPE:", mapePop, "%\n")
## MAPE: 11.76364 %
plot(modPop, main = "Modelo Arima Popular")
lines(testPop, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 3)

#MODELO RESERVAS
trainRes <- window(RESERVAS, start = c(2018, 1), end = c(2022, 12))
testRes <- window(RESERVAS, start = c(2023, 1))
plot.ts(trainRes)

modeloRes <- auto.arima(trainRes, trace = TRUE)
##
## ARIMA(2,1,2)(1,0,1)[12] with drift : 91.33855
## ARIMA(0,1,0) with drift : 109.6497
## ARIMA(1,1,0)(1,0,0)[12] with drift : 83.71072
## ARIMA(0,1,1)(0,0,1)[12] with drift : 83.9314
## ARIMA(0,1,0) : 107.59
## ARIMA(1,1,0) with drift : 104.8481
## ARIMA(1,1,0)(1,0,1)[12] with drift : 84.54622
## ARIMA(1,1,0)(0,0,1)[12] with drift : 83.40574
## ARIMA(0,1,0)(0,0,1)[12] with drift : 87.1748
## ARIMA(2,1,0)(0,0,1)[12] with drift : 85.70768
## ARIMA(1,1,1)(0,0,1)[12] with drift : 85.60747
## ARIMA(2,1,1)(0,0,1)[12] with drift : 88.05817
## ARIMA(1,1,0)(0,0,1)[12] : 81.15865
## ARIMA(1,1,0) : 102.6559
## ARIMA(1,1,0)(1,0,1)[12] : 82.27176
## ARIMA(1,1,0)(1,0,0)[12] : 81.50907
## ARIMA(0,1,0)(0,0,1)[12] : 85.06239
## ARIMA(2,1,0)(0,0,1)[12] : 83.3702
## ARIMA(1,1,1)(0,0,1)[12] : 83.26533
## ARIMA(0,1,1)(0,0,1)[12] : 81.69696
## ARIMA(2,1,1)(0,0,1)[12] : 85.62181
##
## Best model: ARIMA(1,1,0)(0,0,1)[12]
modRes <- forecast(modeloRes, h=12, level = 0)
autoplot(modRes)

checkresiduals(modRes)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)(0,0,1)[12]
## Q* = 4.0879, df = 10, p-value = 0.9433
##
## Model df: 2. Total lags used: 12
rmseRes <- sqrt(mean((modRes$mean - testRes)^2))
maeRes <- mean(abs(modRes$mean - testRes))
mapeRes <- mean(abs((modRes$mean - testRes) / testRes)) * 100
cat("RMSE:", rmseRes, "\n")
## RMSE: 1.17241
cat("MAE:", maeRes, "\n")
## MAE: 1.082183
cat("MAPE:", mapeRes, "%\n")
## MAPE: 10.352 %
plot(modRes)
lines(testRes, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 1)

#MODELO ROMANA
trainRom <- window(ROMANA, start = c(2018, 1), end = c(2022, 12))
testRom <- window(ROMANA, start = c(2023, 1))
plot.ts(trainRom)

modeloRom <- auto.arima(trainRom, trace = TRUE)
##
## ARIMA(2,1,2)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,0) with drift : 139.0531
## ARIMA(1,1,0)(1,0,0)[12] with drift : 105.1986
## ARIMA(0,1,1)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0) : 136.9094
## ARIMA(1,1,0) with drift : 126.8135
## ARIMA(1,1,0)(1,0,1)[12] with drift : Inf
## ARIMA(1,1,0)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0)(1,0,0)[12] with drift : 113.8598
## ARIMA(2,1,0)(1,0,0)[12] with drift : 107.5818
## ARIMA(1,1,1)(1,0,0)[12] with drift : 106.8396
## ARIMA(0,1,1)(1,0,0)[12] with drift : 105.942
## ARIMA(2,1,1)(1,0,0)[12] with drift : 109.8954
## ARIMA(1,1,0)(1,0,0)[12] : 103.0515
## ARIMA(1,1,0) : 124.5929
## ARIMA(1,1,0)(1,0,1)[12] : Inf
## ARIMA(1,1,0)(0,0,1)[12] : Inf
## ARIMA(0,1,0)(1,0,0)[12] : 112.1692
## ARIMA(2,1,0)(1,0,0)[12] : 105.3413
## ARIMA(1,1,1)(1,0,0)[12] : 104.4923
## ARIMA(0,1,1)(1,0,0)[12] : 103.9012
## ARIMA(2,1,1)(1,0,0)[12] : 107.5824
##
## Best model: ARIMA(1,1,0)(1,0,0)[12]
modRom <- forecast(modeloRom, h=12, level = 0)
autoplot(modRom)

checkresiduals(modRom)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)(1,0,0)[12]
## Q* = 15.051, df = 10, p-value = 0.1302
##
## Model df: 2. Total lags used: 12
rmseRom <- sqrt(mean((modRom$mean - testRom)^2))
maeRom <- mean(abs(modRom$mean - testRom))
mapeRom <- mean(abs((modRom$mean - testRom) / testRom)) * 100
cat("RMSE:", rmseRom, "\n")
## RMSE: 1.997691
cat("MAE:", maeRom, "\n")
## MAE: 1.907074
cat("MAPE:", mapeRom, "%\n")
## MAPE: 19.38354 %
plot(modRom)
lines(testRom, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 1)

#MODELO SIEMBRA
trainSiem <- window(SIEMBRA, start = c(2018, 1), end = c(2022, 12))
testSiem <- window(SIEMBRA, start = c(2023, 1))
plot.ts(trainSiem)

modeloSiem <- auto.arima(trainSiem, trace = TRUE)
##
## ARIMA(2,1,2)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,0) with drift : 130.9177
## ARIMA(1,1,0)(1,0,0)[12] with drift : 117.8469
## ARIMA(0,1,1)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0) : 128.7858
## ARIMA(1,1,0) with drift : 131.3985
## ARIMA(1,1,0)(1,0,1)[12] with drift : Inf
## ARIMA(1,1,0)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,0)(1,0,0)[12] with drift : 116.5544
## ARIMA(0,1,0)(1,0,1)[12] with drift : Inf
## ARIMA(0,1,0)(0,0,1)[12] with drift : Inf
## ARIMA(0,1,1)(1,0,0)[12] with drift : 117.9567
## ARIMA(1,1,1)(1,0,0)[12] with drift : 119.5965
## ARIMA(0,1,0)(1,0,0)[12] : 114.5842
## ARIMA(0,1,0)(1,0,1)[12] : Inf
## ARIMA(0,1,0)(0,0,1)[12] : Inf
## ARIMA(1,1,0)(1,0,0)[12] : 115.7381
## ARIMA(0,1,1)(1,0,0)[12] : 115.8559
## ARIMA(1,1,1)(1,0,0)[12] : 117.3104
##
## Best model: ARIMA(0,1,0)(1,0,0)[12]
modSiem <- forecast(modeloSiem, h=12, level = 0)
autoplot(modSiem)

checkresiduals(modSiem)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)(1,0,0)[12]
## Q* = 9.4136, df = 11, p-value = 0.5838
##
## Model df: 1. Total lags used: 12
rmseSiem <- sqrt(mean((modSiem$mean - testSiem)^2))
maeSiem <- mean(abs(modSiem$mean - testSiem))
mapeSiem <- mean(abs((modSiem$mean - testSiem) / testSiem)) * 100
cat("RMSE:", rmseSiem, "\n")
## RMSE: 2.162868
cat("MAE:", maeSiem, "\n")
## MAE: 2.038901
cat("MAPE:", mapeSiem, "%\n")
## MAPE: 18.95748 %
plot(modSiem)
lines(testSiem, col = "red")
legend("topright", legend = c("Datos de prueba","Predicciones"), col = c("red","blue"), lty = 1)

library(cowplot)
## Warning: package 'cowplot' was built under R version 4.3.3
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
##
## stamp
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.3.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(forecast)
library(ggplot2)
library(cowplot)
#Ahora a guardar los graficos y luego combinarlos
grafico1 <- autoplot(modAlt, ylab = "Rentabilidad") + labs(title = "Modelo Arima Atlántico")
grafico2 <- autoplot(modRom, ylab = "Rentabilidad") + labs(title = "Modelo Arima Romana")
grafico3 <- autoplot(modCre, ylab = "Rentabilidad") + labs(title = "Modelo Arima Crecer")
grafico4 <- autoplot(modRes, ylab = "Rentabilidad") + labs(title = "Modelo Arima Reservas")
grafico5 <- autoplot(modJmmb, ylab = "Rentabilidad") + labs(title = "Modelo Arima Jmmb-BDI")
grafico6 <- autoplot(modPop, ylab = "Rentabilidad") + labs(title = "Modelo Arima Popular")
grafico7 <- autoplot(modSiem, ylab = "Rentabilidad") + labs(title = "Modelo Arima Siembra")
#Combinando los graficos con sus datos de pruebas y datos pronosticados
grafico_prueba <- autoplot(testAtl) + labs(title = "Prueba")
datos_prueba <- ggplot_build(grafico_prueba)$data[[1]]
grafico1 <- grafico1 + geom_line(data = datos_prueba, aes(x = x, y = y), color ="red")
grafico_prueba2 <- autoplot(testRom) + labs(title = "Prueba")
datos_prueba2 <- ggplot_build(grafico_prueba2)$data[[1]]
grafico2 <- grafico2 + geom_line(data = datos_prueba2, aes(x = x, y = y), color ="red")
grafico_prueba3 <- autoplot(testCre) + labs(title = "Prueba")
datos_prueba3 <- ggplot_build(grafico_prueba3)$data[[1]]
grafico3 <- grafico3 + geom_line(data = datos_prueba3, aes(x = x, y = y), color ="red")
grafico_prueba4 <- autoplot(testRes) + labs(title = "Prueba")
datos_prueba4 <- ggplot_build(grafico_prueba4)$data[[1]]
grafico4 <- grafico4 + geom_line(data = datos_prueba4, aes(x = x, y = y), color ="red")
grafico_prueba5 <- autoplot(testJmmb) + labs(title = "Prueba")
datos_prueba5 <- ggplot_build(grafico_prueba5)$data[[1]]
grafico5 <- grafico5 + geom_line(data = datos_prueba5, aes(x = x, y = y), color ="red")
grafico_prueba6 <- autoplot(testPop) + labs(title = "Prueba")
datos_prueba6 <- ggplot_build(grafico_prueba6)$data[[1]]
grafico6 <- grafico6 + geom_line(data = datos_prueba6, aes(x = x, y = y), color ="red")
grafico_prueba7 <- autoplot(testSiem) + labs(title = "Prueba")
datos_prueba7 <- ggplot_build(grafico_prueba7)$data[[1]]
grafico7 <- grafico7 + geom_line(data = datos_prueba7, aes(x = x, y = y), color ="red")
# Combinar los gráficos en mosaicos separados con pronosticos vs pruebas
plot_grid(grafico1, grafico2,grafico3,grafico4,grafico5,grafico6, grafico7, nrow = 3)
