library(readxl)
remesas <- read_excel("C:/Users/MINEDUCYT/Downloads/baseremesasnormal.xlsx",
col_types = c("text", "numeric"))
remesas
## # A tibble: 360 × 2
## Concepto `Ingresos mensuales de remesas familiares`
## <chr> <dbl>
## 1 1991-01 63.1
## 2 1991-02 58.4
## 3 1991-03 67.6
## 4 1991-04 77.8
## 5 1991-05 77.4
## 6 1991-06 67.8
## 7 1991-07 70
## 8 1991-08 53.5
## 9 1991-09 53.1
## 10 1991-10 64
## # ℹ 350 more rows
names(remesas)=c("fecha", "ingresos")
remesas
## # A tibble: 360 × 2
## fecha ingresos
## <chr> <dbl>
## 1 1991-01 63.1
## 2 1991-02 58.4
## 3 1991-03 67.6
## 4 1991-04 77.8
## 5 1991-05 77.4
## 6 1991-06 67.8
## 7 1991-07 70
## 8 1991-08 53.5
## 9 1991-09 53.1
## 10 1991-10 64
## # ℹ 350 more rows
remesas=remesas[,2]
remesas=log(remesas$ingresos)
remesas=ts(remesas,start = c(1991,01),frequency = 12)
tail(remesas)
## [1] 6.315611 6.326686 6.316153 6.355100 6.252713 6.479462
length(remesas)
## [1] 360
library(TSstudio)
# Graficar la serie temporal con TSstudio
ts_plot(remesas,
title = "Ingresos Mensuales de Remesas Familiares",
Ytitle = "Remesas (en millones)",
Xtitle = "Fecha",
line.mode = "lines+markers", # Añade tanto línea como puntos
width = 2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
ndiffs(remesas)
## [1] 1
nsdiffs(remesas)
## [1] 1
# Ajustar modelo ARIMA con estacionalidad
library(forecast)
sarima_model <- auto.arima(remesas, seasonal = TRUE,trace=TRUE)
##
## Fitting models using approximations to speed things up...
##
## ARIMA(2,1,2)(1,1,1)[12] : -804.2684
## ARIMA(0,1,0)(0,1,0)[12] : -681.2231
## ARIMA(1,1,0)(1,1,0)[12] : -746.6346
## ARIMA(0,1,1)(0,1,1)[12] : -783.1359
## ARIMA(2,1,2)(0,1,1)[12] : -789.2607
## ARIMA(2,1,2)(1,1,0)[12] : -789.5509
## ARIMA(2,1,2)(2,1,1)[12] : -830.2439
## ARIMA(2,1,2)(2,1,0)[12] : -796.7153
## ARIMA(2,1,2)(2,1,2)[12] : -828.3854
## ARIMA(2,1,2)(1,1,2)[12] : -821.5742
## ARIMA(1,1,2)(2,1,1)[12] : -834.6936
## ARIMA(1,1,2)(1,1,1)[12] : -801.496
## ARIMA(1,1,2)(2,1,0)[12] : -798.7567
## ARIMA(1,1,2)(2,1,2)[12] : -832.7643
## ARIMA(1,1,2)(1,1,0)[12] : -784.4246
## ARIMA(1,1,2)(1,1,2)[12] : -820.9183
## ARIMA(0,1,2)(2,1,1)[12] : -821.8446
## ARIMA(1,1,1)(2,1,1)[12] : -836.546
## ARIMA(1,1,1)(1,1,1)[12] : -803.7068
## ARIMA(1,1,1)(2,1,0)[12] : -798.6473
## ARIMA(1,1,1)(2,1,2)[12] : -834.6171
## ARIMA(1,1,1)(1,1,0)[12] : -785.9254
## ARIMA(1,1,1)(1,1,2)[12] : -822.8537
## ARIMA(0,1,1)(2,1,1)[12] : -808.9675
## ARIMA(1,1,0)(2,1,1)[12] : -798.3875
## ARIMA(2,1,1)(2,1,1)[12] : -829.4463
## ARIMA(0,1,0)(2,1,1)[12] : -770.0999
## ARIMA(2,1,0)(2,1,1)[12] : -829.0295
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,1,1)(2,1,1)[12] : -890.566
##
## Best model: ARIMA(1,1,1)(2,1,1)[12]
# Resumen del modelo ajustado
summary(sarima_model)
## Series: remesas
## ARIMA(1,1,1)(2,1,1)[12]
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1
## 0.3955 -0.8331 0.3705 -0.1882 -0.8437
## s.e. 0.0816 0.0523 0.0753 0.0698 0.0560
##
## sigma^2 = 0.004243: log likelihood = 451.41
## AIC=-890.81 AICc=-890.57 BIC=-867.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 0.0001771837 0.06349264 0.04322869 -0.001850719 0.8432198
## MASE ACF1
## Training set 0.4795681 -0.001630074
library(lmtest)
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
coef(sarima_model)
## ar1 ma1 sar1 sar2 sma1
## 0.3954834 -0.8330898 0.3704664 -0.1881861 -0.8437298
residuos=residuals(sarima_model)
coeftest(sarima_model)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.395483 0.081616 4.8456 1.262e-06 ***
## ma1 -0.833090 0.052339 -15.9171 < 2.2e-16 ***
## sar1 0.370466 0.075336 4.9175 8.764e-07 ***
## sar2 -0.188186 0.069790 -2.6965 0.007008 **
## sma1 -0.843730 0.055978 -15.0726 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Diagnóstico de los residuos
checkresiduals(sarima_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(2,1,1)[12]
## Q* = 26.253, df = 19, p-value = 0.1233
##
## Model df: 5. Total lags used: 24
# Gráfico Q-Q de los residuos
qqnorm(residuos, main = "Q-Q Plot de los Residuos")
qqline(residuos, col = "red")

# Prueba de normalidad de Shapiro-Wilk
shapiro.test(residuos)
##
## Shapiro-Wilk normality test
##
## data: residuos
## W = 0.91971, p-value = 5.924e-13
library(tseries)
# Prueba de Jarque-Bera
jarque.bera.test(residuos)
##
## Jarque Bera Test
##
## data: residuos
## X-squared = 953.71, df = 2, p-value < 2.2e-16
mean(sarima_model$residuals)
## [1] 0.0001771837
var(sarima_model$residuals)
## [1] 0.004042513
# Aplicar transformación Box-Cox
lambda <- BoxCox.lambda(remesas)
boxcox_arima <- auto.arima(remesas, lambda = lambda,trace = TRUE,seasonal = TRUE)
##
## Fitting models using approximations to speed things up...
##
## ARIMA(2,1,2)(1,1,1)[12] : -1083.815
## ARIMA(0,1,0)(0,1,0)[12] : -956.8686
## ARIMA(1,1,0)(1,1,0)[12] : -1026.6
## ARIMA(0,1,1)(0,1,1)[12] : -1061.327
## ARIMA(2,1,2)(0,1,1)[12] : -1066.82
## ARIMA(2,1,2)(1,1,0)[12] : -1070.597
## ARIMA(2,1,2)(2,1,1)[12] : -1112.887
## ARIMA(2,1,2)(2,1,0)[12] : -1078.693
## ARIMA(2,1,2)(2,1,2)[12] : -1111.158
## ARIMA(2,1,2)(1,1,2)[12] : -1101.737
## ARIMA(1,1,2)(2,1,1)[12] : -1116.699
## ARIMA(1,1,2)(1,1,1)[12] : -1080.784
## ARIMA(1,1,2)(2,1,0)[12] : -1080.787
## ARIMA(1,1,2)(2,1,2)[12] : -1114.867
## ARIMA(1,1,2)(1,1,0)[12] : -1065.309
## ARIMA(1,1,2)(1,1,2)[12] : -1100.775
## ARIMA(0,1,2)(2,1,1)[12] : -1102.205
## ARIMA(1,1,1)(2,1,1)[12] : -1118.583
## ARIMA(1,1,1)(1,1,1)[12] : -1082.867
## ARIMA(1,1,1)(2,1,0)[12] : -1080.324
## ARIMA(1,1,1)(2,1,2)[12] : -1116.745
## ARIMA(1,1,1)(1,1,0)[12] : -1066.831
## ARIMA(1,1,1)(1,1,2)[12] : -1102.618
## ARIMA(0,1,1)(2,1,1)[12] : -1090.117
## ARIMA(1,1,0)(2,1,1)[12] : -1079.485
## ARIMA(2,1,1)(2,1,1)[12] : -1112.786
## ARIMA(0,1,0)(2,1,1)[12] : -1049.57
## ARIMA(2,1,0)(2,1,1)[12] : -1112.468
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(1,1,1)(2,1,1)[12] : -1180.514
##
## Best model: ARIMA(1,1,1)(2,1,1)[12]
boxcox_arima
## Series: remesas
## ARIMA(1,1,1)(2,1,1)[12]
## Box Cox transformation: lambda= 0.7435997
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1
## 0.3854 -0.8315 0.3785 -0.1868 -0.8537
## s.e. 0.0820 0.0526 0.0735 0.0691 0.0539
##
## sigma^2 = 0.001837: log likelihood = 596.38
## AIC=-1180.76 AICc=-1180.51 BIC=-1157.66
shapiro.test(residuals(boxcox_arima))
##
## Shapiro-Wilk normality test
##
## data: residuals(boxcox_arima)
## W = 0.92213, p-value = 1e-12
library(tsoutliers)
modelo=tsoutliers::tso(remesas)
modelo
## Series: remesas
## Regression with ARIMA(2,1,0)(2,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 sar1 sar2 TC352 LS354
## -0.5957 -0.3807 0.6902 0.1667 -0.4292 0.2926
## s.e. 0.0492 0.0491 0.0541 0.0561 0.0536 0.0513
##
## sigma^2 = 0.00375: log likelihood = 488.75
## AIC=-963.5 AICc=-963.19 BIC=-936.32
##
## Outliers:
## type ind time coefhat tstat
## 1 TC 352 2020:04 -0.4292 -8.008
## 2 LS 354 2020:06 0.2926 5.700
escalon1= c(rep(0,251),rep(1,109))
escalon2= c(rep(0,253),rep(1,107))
esca=c(rep(0,251),rep(1,1),rep(0,108))
exogenas=cbind(escalon1,escalon2)
serie2=ts(escalon1,start = c(1991,01), frequency = 12)
serie3=ts(esca,start = c(1991,01), frequency = 12)
serie4=ts(exogenas,start = c(1991,01), frequency = 12)
# Ajuste de un modelo ARIMAX con una variable exógena
arimax_model <- auto.arima(remesas, xreg = serie4)
arimax_model
## Series: remesas
## Regression with ARIMA(3,1,1)(2,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 ar3 ma1 sar1 sar2 escalon1 escalon2
## 0.4195 0.0544 0.1978 -0.9417 0.7302 0.1069 0.0125 0.0448
## s.e. 0.0651 0.0604 0.0589 0.0339 0.0618 0.0622 0.0496 0.0496
##
## sigma^2 = 0.004753: log likelihood = 448.07
## AIC=-878.13 AICc=-877.62 BIC=-843.18
coeftest(arimax_model)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.419471 0.065072 6.4463 1.146e-10 ***
## ar2 0.054366 0.060373 0.9005 0.3678554
## ar3 0.197814 0.058873 3.3600 0.0007794 ***
## ma1 -0.941664 0.033862 -27.8089 < 2.2e-16 ***
## sar1 0.730242 0.061813 11.8137 < 2.2e-16 ***
## sar2 0.106856 0.062185 1.7183 0.0857338 .
## escalon1 0.012490 0.049563 0.2520 0.8010331
## escalon2 0.044761 0.049602 0.9024 0.3668434
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
residuos2=residuals(arimax_model)
shapiro.test(residuos2)
##
## Shapiro-Wilk normality test
##
## data: residuos2
## W = 0.92058, p-value = 7.136e-13
checkresiduals(arimax_model)

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(3,1,1)(2,0,0)[12] errors
## Q* = 31.819, df = 18, p-value = 0.0231
##
## Model df: 6. Total lags used: 24
# Instalar y cargar la librería para modelos NNAR
#install.packages("forecast")
library(forecast)
# Ajustar un modelo NNAR
nnar_model <- nnetar(remesas)
# Hacer predicciones con el modelo NNAR
nnar_forecast <- forecast(nnar_model, h = 12)
nnar_forecast
## Jan Feb Mar Apr May Jun Jul Aug
## 2021 6.200112 6.208958 6.161031 5.858566 6.044261 6.211939 6.324211 6.368791
## Sep Oct Nov Dec
## 2021 6.380136 6.413659 6.349457 6.496254
summary(nnar_forecast)
##
## Forecast method: NNAR(2,1,2)[12]
##
## Model Information:
##
## Average of 20 networks, each of which is
## a 3-2-1 network with 11 weights
## options were - linear output units
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.016107e-05 0.07606451 0.0555053 -0.0218143 1.069633 0.6157617
## ACF1
## Training set 0.2576641
##
## Forecasts:
## Jan Feb Mar Apr May Jun Jul Aug
## 2021 6.200112 6.208958 6.161031 5.858566 6.044261 6.211939 6.324211 6.368791
## Sep Oct Nov Dec
## 2021 6.380136 6.413659 6.349457 6.496254
plot(nnar_forecast)

checkresiduals(nnar_model)

##
## Ljung-Box test
##
## data: Residuals from NNAR(2,1,2)[12]
## Q* = 116.36, df = 24, p-value = 4.308e-14
##
## Model df: 0. Total lags used: 24
shapiro.test(residuals(nnar_model))
##
## Shapiro-Wilk normality test
##
## data: residuals(nnar_model)
## W = 0.94695, p-value = 7.493e-10
shapiro.test(nnar_model$residuals)
##
## Shapiro-Wilk normality test
##
## data: nnar_model$residuals
## W = 0.94695, p-value = 7.493e-10
# Verificar la autocorrelación de los residuos
residuals_nn <- residuals(nnar_model)
acf(na.omit(residuals_nn))

# Transformación logarítmica en los datos
log_data <- log(remesas)
nnar_model_log <- nnetar(log_data)
pred=forecast(nnar_model_log,h=12)
exp(pred$mean)
## Jan Feb Mar Apr May Jun Jul Aug
## 2021 6.187901 6.205092 6.155573 5.859705 6.048335 6.201643 6.330416 6.389658
## Sep Oct Nov Dec
## 2021 6.405789 6.445047 6.365577 6.547705
plot(pred)

nnar_model_log
## Series: log_data
## Model: NNAR(3,1,2)[12]
## Call: nnetar(y = log_data)
##
## Average of 20 networks, each of which is
## a 4-2-1 network with 13 weights
## options were - linear output units
##
## sigma^2 estimated as 0.0002101
shapiro.test(nnar_model_log$residuals)
##
## Shapiro-Wilk normality test
##
## data: nnar_model_log$residuals
## W = 0.96339, p-value = 1.185e-07
# Ver los residuos
residuals_nn_log <- residuals(nnar_model_log)
# Ajustar el modelo NNAR con diferentes configuraciones
nnar_model <- nnetar(remesas, p = 5, P = 1, size = 10) # Probar con más lags (p) o más neuronas (size)
# Verificar los residuos
residuals_nn <- residuals(nnar_model)
shapiro.test(residuals_nn)
##
## Shapiro-Wilk normality test
##
## data: residuals_nn
## W = 0.97675, p-value = 2.116e-05