library(tidyverse)
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'purrr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr     1.0.8     v readr     2.1.4
## v forcats   1.0.0     v stringr   1.5.1
## v ggplot2   3.4.2     v tibble    3.1.6
## v lubridate 1.9.2     v tidyr     1.2.0
## v purrr     0.3.4     
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## i Use the [conflicted package](http://conflicted.r-lib.org/) to force all conflicts to become errors
library(forecast)
## Warning: package 'forecast' was built under R version 4.0.5
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

CARGA DE DATOS

df=readxl::read_excel("C:/Users/ronald.hernandez/Documents/pronosticos/Indice de volumen de actividad economica/Índice_de_Volumen_de_la_Actividad_Económica_(IVAE)._Serie_desestacionalizada.xlsx")

desempleo_us= readxl::read_excel("C:/Users/ronald.hernandez/Documents/pronosticos/remesas/UNRATE.xls")
ipc_us= readxl::read_excel("C:/Users/ronald.hernandez/Documents/pronosticos/remesas/CPIAUCNS.xls")
remesas= readxl::read_excel("C:/Users/ronald.hernandez/Documents/pronosticos/pronosticos comercio exterior/Ingresos_mensuales_de_remesas_familiares - forecasted.xlsx")
impo_expo= readxl::read_excel("C:/Users/ronald.hernandez/Documents/pronosticos/pronosticos comercio exterior/Importaciones y exportaciones BCR.xlsx")
PIB_prod= readxl::read_excel("C:/Users/ronald.hernandez/Documents/pronosticos/PIB/PIB.xlsx")

IPC= readxl::read_excel("C:/Users/ronald.hernandez/Documents/pronosticos/IPC/Índice_de_Precios_al_Consumidor_(IPC).xlsx")


IPC_filtrado= IPC %>% filter(IPC$fecha>="2004-12-01")

impo_expo_filtrado=impo_expo %>% filter(fecha>="2004-12-01")

desempleo_us_filtrado=desempleo_us %>% filter(observation_date>="2004-12-01")


ipc_us_filtrado=ipc_us %>% filter(observation_date>="2004-12-01")

remesas_filtrado=remesas %>% filter(fecha>="2004-12-01")

PIB_filtrado=PIB_prod %>% filter(fecha >="2004-12-01")


serie_ivae <- ts(df$IVAE,
                   frequency = 12, start = c(2005,1))

autoplot(serie_ivae)

ANÁLISIS DE ESTACIONALIDAD

df_serie=data.frame(Y=as.matrix(serie_ivae), fecha=zoo::as.Date(time(serie_ivae)))

# Crear el resumen mensual y ordenar los meses
resumen_mensual <- df_serie %>%
  mutate(mes = factor(month(fecha), labels = c("Ene", "Feb", "Mar", "Abr", "May", "Jun", "Jul", "Ago", "Sep", "Oct", "Nov", "Dic"), ordered = TRUE))

# Trazar el boxplot
ggplot(resumen_mensual, aes(x = mes, y = Y)) +
  geom_boxplot() +
  labs(x = "Mes", y = "IVAE", title = "Boxplot de IVAE El Salvador") 

autoplot(decompose(serie_ivae))

# DIVISIÓN DE DATOS

training <- window(serie_ivae, end =c(2022,5))
test <- window(serie_ivae, start =c(2022,6))

MODELO MEDIA

modelo_media=Arima(training,order=c(0,1,0), include.mean=T, include.drift =T)

pronosticos=forecast(modelo_media)

autoplot(pronosticos)+ autolayer(test, series ="Test data")

checkresiduals(modelo_media)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0) with drift
## Q* = 33.733, df = 23, p-value = 0.06911
## 
## Model df: 1.   Total lags used: 24
accuracy(pronosticos, test)
##                        ME     RMSE      MAE         MPE     MAPE      MASE
## Training set 0.0003820006 1.939076 1.229159 -0.01980926 1.288017 0.3412887
## Test set     1.1592467949 1.912226 1.458157  0.93768408 1.188892 0.4048724
##                   ACF1 Theil's U
## Training set 0.1126920        NA
## Test set     0.4552457  1.230682

MODELO ARIMA

modelo_arima1=auto.arima(training, stepwise = F, d=1, D=1)

pronosticos=forecast(modelo_arima1)

autoplot(pronosticos)+ autolayer(test, series ="Test data")

checkresiduals(modelo_arima1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)(2,1,2)[12]
## Q* = 25.977, df = 19, p-value = 0.1308
## 
## Model df: 5.   Total lags used: 24
accuracy(pronosticos, test)
##                       ME     RMSE      MAE         MPE     MAPE      MASE
## Training set -0.01485517 1.929518 1.260524 -0.04202314 1.307761 0.3499976
## Test set     -1.87899957 3.019338 2.677522 -1.57280893 2.221545 0.7434417
##                   ACF1 Theil's U
## Training set 0.0183799        NA
## Test set     0.6405982  1.946319

MODELO ARIMAX

reg=cbind("desempleo"=head(desempleo_us_filtrado$UNRATE, length(training)),
          "importaciones_totales"=head(impo_expo_filtrado$`importaciones totales`, length(training)),
          "exportaciones_totales"=head(impo_expo_filtrado$`exportaciones totales`, length(training)),
          "PIB_producción"=head(PIB_filtrado$`Producto Interno Bruto trimestral por el enfoque de la producción`, length(training)))


new_xreg= cbind(head(desempleo_us_filtrado %>% filter(observation_date>"2022-05-01"),24) %>% pull(UNRATE),
                head(impo_expo_filtrado %>% filter(fecha>"2022-05-01"),24) %>% pull(`importaciones totales`),
                head(impo_expo_filtrado %>% filter(fecha>"2022-05-01"),24) %>% pull(`exportaciones totales`),
                head(PIB_filtrado %>% filter(fecha>"2022-05-01"),24) %>% pull(`Producto Interno Bruto trimestral por el enfoque de la producción`))

modelo_arima2=auto.arima(training, xreg=reg, stepwise = F, d=1, D=1)

pronosticos=forecast(modelo_arima2, xreg=new_xreg)
## Warning in forecast.forecast_ARIMA(modelo_arima2, xreg = new_xreg): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
autoplot(pronosticos)+ autolayer(test, series ="Test data")

checkresiduals(modelo_arima2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,1,4)(0,1,1)[12] errors
## Q* = 18.912, df = 15, p-value = 0.2177
## 
## Model df: 9.   Total lags used: 24
accuracy(pronosticos, test)
##                       ME     RMSE       MAE         MPE      MAPE      MASE
## Training set -0.04402747 1.244011 0.9134803 -0.05981333 0.9381635 0.2536372
## Test set      1.17760726 1.808699 1.5255622  0.95905522 1.2500179 0.4235881
##                     ACF1 Theil's U
## Training set 0.002435346        NA
## Test set     0.056695351  1.170672
lmtest::coeftest(modelo_arima2)
## 
## z test of coefficients:
## 
##                         Estimate Std. Error  z value  Pr(>|z|)    
## ma1                   -1.0615498  0.0899501 -11.8015 < 2.2e-16 ***
## ma2                    0.0566141  0.1052900   0.5377 0.5907864    
## ma3                   -0.0855418  0.1186179  -0.7212 0.4708145    
## ma4                    0.1654842  0.0968217   1.7092 0.0874206 .  
## sma1                  -0.8782694  0.0635840 -13.8127 < 2.2e-16 ***
## desempleo             -0.3280335  0.0978128  -3.3537 0.0007974 ***
## importaciones_totales -0.0025753  0.0011948  -2.1554 0.0311286 *  
## exportaciones_totales  0.0114538  0.0037989   3.0150 0.0025696 ** 
## PIB_producción         0.7680350  0.0719547  10.6739 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HOLT WINTERS

modelo_hw=forecast::hw(training)

pronosticos=forecast(modelo_hw)

autoplot(pronosticos)+ autolayer(test, series ="Test data")

checkresiduals(modelo_hw)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' additive method
## Q* = 34.493, df = 8, p-value = 3.306e-05
## 
## Model df: 16.   Total lags used: 24
accuracy(pronosticos, test)
##                       ME     RMSE      MAE          MPE     MAPE      MASE
## Training set  0.01743881 1.987038 1.354125 -0.002081692 1.419391 0.3759869
## Test set     -2.53567772 3.141696 2.610154 -2.096924462 2.157298 0.7247363
##                    ACF1 Theil's U
## Training set 0.08763389        NA
## Test set     0.61518874  2.030137

SUAVISADO EXPONENCIAL

modelo_ets=forecast::ets(training)

pronosticos=forecast(modelo_ets)

autoplot(pronosticos)+ autolayer(test, series ="Test data")

checkresiduals(modelo_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 32.55, df = 22, p-value = 0.06849
## 
## Model df: 2.   Total lags used: 24
accuracy(pronosticos, test)
##                     ME     RMSE      MAE       MPE     MAPE      MASE      ACF1
## Training set 0.1684727 1.955922 1.266075 0.1484995 1.325906 0.3515389 0.1013979
## Test set     3.4321803 4.065123 3.514672 2.7987893 2.869386 0.9758850 0.6455158
##              Theil's U
## Training set        NA
## Test set       2.63509

TBATS

modelo_tbats=forecast::tbats(training)

pronosticos=forecast(modelo_tbats)

autoplot(pronosticos)+ autolayer(test, series ="Test data")

checkresiduals(modelo_tbats)

## 
##  Ljung-Box test
## 
## data:  Residuals from TBATS
## Q* = 23.135, df = 13, p-value = 0.04009
## 
## Model df: 11.   Total lags used: 24
accuracy(pronosticos, test)
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1640077 1.828028 1.225426 0.1456915 1.283480 0.3402521
## Test set     5.8402993 6.414965 5.889981 4.7788024 4.821319 1.6354139
##                     ACF1 Theil's U
## Training set -0.08474781        NA
## Test set      0.68861823  4.173559

MODELO DE REGRESIÓN LINEAL

reg=cbind("desempleo"=head(desempleo_us_filtrado$UNRATE, length(training)),
          "importaciones_totales"=head(impo_expo_filtrado$`importaciones totales`, length(training)),
          "exportaciones_totales"=head(impo_expo_filtrado$`exportaciones totales`, length(training)),
          "PIB_producción"=head(PIB_filtrado$`Producto Interno Bruto trimestral por el enfoque de la producción`, length(training)),
          "remesas"=head(remesas_filtrado$remesas, length(training)),
          "IPC"=head(ipc_us_filtrado$CPIAUCNS, length(training)))


new_xreg= cbind(head(desempleo_us_filtrado %>% filter(observation_date>"2022-05-01"),24) %>% pull(UNRATE),
                head(impo_expo_filtrado %>% filter(fecha>"2022-05-01"),24) %>% pull(`importaciones totales`),
                head(impo_expo_filtrado %>% filter(fecha>"2022-05-01"),24) %>% pull(`exportaciones totales`),
                head(PIB_filtrado %>% filter(fecha>"2022-05-01"),24) %>% pull(`Producto Interno Bruto trimestral por el enfoque de la producción`),
                head(remesas_filtrado %>% filter(fecha>"2022-05-01"),24) %>% pull(remesas),
                head(ipc_us_filtrado %>% filter(observation_date>"2022-05-01"),24) %>% pull(CPIAUCNS))




modelo_arima2=Arima(training, xreg=reg, order=c(0,0,0))

pronosticos=forecast(modelo_arima2, xreg=new_xreg)
## Warning in forecast.forecast_ARIMA(modelo_arima2, xreg = new_xreg): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
autoplot(pronosticos)+ autolayer(test, series ="Test data")

checkresiduals(modelo_arima2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,0,0) errors
## Q* = 21.482, df = 17, p-value = 0.2055
## 
## Model df: 7.   Total lags used: 24
accuracy(pronosticos, test)
##                         ME     RMSE       MAE        MPE      MAPE      MASE
## Training set  7.989357e-14 1.265689 0.9086107 -0.0182219 0.9424790 0.2522852
## Test set     -3.059151e-01 1.385951 1.1091700 -0.2596493 0.9135611 0.3079725
##                     ACF1 Theil's U
## Training set -0.08746208        NA
## Test set      0.37004458 0.8907436
lmtest::coeftest(modelo_arima2)
## 
## z test of coefficients:
## 
##                          Estimate  Std. Error z value  Pr(>|z|)    
## intercept              9.57047674  1.57916167  6.0605 1.357e-09 ***
## desempleo             -0.28472443  0.07127590 -3.9947 6.478e-05 ***
## importaciones_totales -0.00293208  0.00120291 -2.4375  0.014789 *  
## exportaciones_totales  0.00795400  0.00256523  3.1007  0.001931 ** 
## PIB_producción         0.80432653  0.04557942 17.6467 < 2.2e-16 ***
## remesas               -0.00072222  0.00190420 -0.3793  0.704483    
## IPC                    0.04731461  0.02086051  2.2681  0.023321 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1