El presente texto tiene como propósito mostrar el proceso y los resultados con los cuales se basan las reglas de decisión establecidas en la evidencia final. La estructura de la presente documentación se divide en:
El siguiente modelo se basa en lo reportado por la teoría económica respecto a la tasa de desempleo.
#base de datos
data <- read_excel("/Users/rogelio/Desktop/5to semestre/eco 2/Desempleo.xls",
sheet = "Datos")
date = as.Date(data$Fecha, format = "%Y/%m/%d")
ur = data$UR
grafica <- ggplotly(ggplot(data, aes(x=date, y=ur)) +
geom_line() +
xlab("") + ylab("Tasa de desempleo (%)") + geom_line( color="#ba202a"))
grafica
ur.ts <- ts(data ,start = c(2005,01), end = c(2022,08), frequency = 12)#Convirtiendo a series de tiempo
adf.ur.trend <- ur.df(ur.ts[,"UR"], type = "trend", selectlags = "BIC")
summary(adf.ur.trend)
###############################################
# Augmented Dickey-Fuller Test Unit Root Test #
###############################################
Test regression trend
Call:
lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
Residuals:
Min 1Q Median 3Q Max
-0.78426 -0.27221 -0.01248 0.17481 1.54072
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.4700530 0.1617396 2.906 0.00406 **
z.lag.1 -0.1026675 0.0347046 -2.958 0.00345 **
tt -0.0003761 0.0004383 -0.858 0.39193
z.diff.lag -0.2159831 0.0679968 -3.176 0.00172 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3809 on 206 degrees of freedom
Multiple R-squared: 0.1099, Adjusted R-squared: 0.09695
F-statistic: 8.479 on 3 and 206 DF, p-value: 2.447e-05
Value of test-statistic is: -2.9583 2.9809 4.469
Critical values for test statistics:
1pct 5pct 10pct
tau3 -3.99 -3.43 -3.13
phi2 6.22 4.75 4.07
phi3 8.43 6.49 5.47
adf.ur.drift <- ur.df(ur.ts[,"UR"], type = "drift", selectlags = "BIC")
summary(adf.ur.drift)
###############################################
# Augmented Dickey-Fuller Test Unit Root Test #
###############################################
Test regression drift
Call:
lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
Residuals:
Min 1Q Median 3Q Max
-0.80761 -0.26693 -0.01933 0.17168 1.51695
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.41177 0.14669 2.807 0.00548 **
z.lag.1 -0.09833 0.03431 -2.866 0.00459 **
z.diff.lag -0.21698 0.06794 -3.194 0.00162 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3806 on 207 degrees of freedom
Multiple R-squared: 0.1067, Adjusted R-squared: 0.0981
F-statistic: 12.37 on 2 and 207 DF, p-value: 8.445e-06
Value of test-statistic is: -2.8657 4.1086
Critical values for test statistics:
1pct 5pct 10pct
tau2 -3.46 -2.88 -2.57
phi1 6.52 4.63 3.81
par(mfrow = c(1,2))
acf(ur, lag =20)
pacf(ur, lag = 20)
modelo.ar13 <- arima(ur, order = c(13, 0, 0))
modelo.ar13
Call:
arima(x = ur, order = c(13, 0, 0))
Coefficients:
ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 ar9 ar10 ar11 ar12
0.7294 0.1081 0.0942 -0.0957 0.0410 0.0851 0.0080 -0.1123 0.0094 -0.0478 0.0639 0.5200
s.e. 0.0600 0.0686 0.0688 0.0692 0.0692 0.0683 0.0692 0.0691 0.0696 0.0691 0.0687 0.0684
ar13 intercept
-0.4610 4.0787
s.e. 0.0605 0.3309
sigma^2 estimated as 0.08879: log likelihood = -47.28, aic = 124.56
summary(modelo.ar13)
Call:
arima(x = ur, order = c(13, 0, 0))
Coefficients:
ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 ar9 ar10 ar11 ar12
0.7294 0.1081 0.0942 -0.0957 0.0410 0.0851 0.0080 -0.1123 0.0094 -0.0478 0.0639 0.5200
s.e. 0.0600 0.0686 0.0688 0.0692 0.0692 0.0683 0.0692 0.0691 0.0696 0.0691 0.0687 0.0684
ar13 intercept
-0.4610 4.0787
s.e. 0.0605 0.3309
sigma^2 estimated as 0.08879: log likelihood = -47.28, aic = 124.56
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.00241304 0.2979725 0.2181923 -0.4744158 5.222412 0.7296202 -0.1040623
#Pruebas individuales
par(mfrow = c(1,2))
acf(modelo.ar13$residuals, lag = 20)
pacf(modelo.ar13$residuals, lag = 20)
#prueba conjunta
Box.test(modelo.ar13$residuals, type = "Ljung", lag = 20)
Box-Ljung test
data: modelo.ar13$residuals
X-squared = 12.728, df = 20, p-value = 0.8887
coeftest(modelo.ar13, df = 198)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
ar1 0.7293895 0.0599885 12.1588 < 2.2e-16 ***
ar2 0.1080542 0.0686046 1.5750 0.1168
ar3 0.0942242 0.0687556 1.3704 0.1721
ar4 -0.0957440 0.0691845 -1.3839 0.1679
ar5 0.0410244 0.0692483 0.5924 0.5542
ar6 0.0851387 0.0683017 1.2465 0.2141
ar7 0.0079568 0.0691901 0.1150 0.9086
ar8 -0.1122942 0.0690820 -1.6255 0.1056
ar9 0.0094212 0.0696386 0.1353 0.8925
ar10 -0.0478498 0.0691018 -0.6925 0.4895
ar11 0.0638990 0.0686876 0.9303 0.3534
ar12 0.5200441 0.0683569 7.6078 1.105e-12 ***
ar13 -0.4609686 0.0604659 -7.6236 1.005e-12 ***
intercept 4.0786778 0.3308510 12.3278 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
modelo.ar13.ref <- arima(ur, transform.pars = TRUE, order = c(13, 0, 0), fixed = c(NA, 0,0,0,0,0,0,0,0,0,0,NA,NA, NA))
Warning: some AR parameters were fixed: setting transform.pars = FALSE
modelo.ar13.ref
Call:
arima(x = ur, order = c(13, 0, 0), transform.pars = TRUE, fixed = c(NA, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA))
Coefficients:
ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 ar9 ar10 ar11 ar12 ar13 intercept
0.8521 0 0 0 0 0 0 0 0 0 0 0.588 -0.5100 4.0946
s.e. 0.0358 0 0 0 0 0 0 0 0 0 0 0.054 0.0579 0.2868
sigma^2 estimated as 0.09519: log likelihood = -54.79, aic = 119.57
# correlograma al modelo refinado:
par(mfrow = c(1,2))
acf(modelo.ar13.ref$residuals, lag = 20)
pacf(modelo.ar13.ref$residuals, lag = 20)
El modelo deja de seguir un proceso de ruido blanco, por lo que se prefiere usar el modelo original
pol.caract.ar13 = c(1, - modelo.ar13$coef[1:13])
raices = polyroot(pol.caract.ar13)
abs(raices)
[1] 1.039631 1.045358 1.028291 1.090034 1.064495 1.069862 1.064495 1.049910 1.028291 1.045358 1.039631
[12] 1.090034 1.148579
A continuación se muestra el modelo generado con la funcion auto.arima
auto.ur= auto.arima(ur, d = 0, seasonal = F, ic = "bic")
auto.ur
Series: ur
ARIMA(1,0,1) with non-zero mean
Coefficients:
ar1 ma1 mean
0.9451 -0.3449 4.1295
s.e. 0.0250 0.0834 0.2854
sigma^2 = 0.1406: log likelihood = -92.17
AIC=192.34 AICc=192.54 BIC=205.77
#Validación
par(mfrow = c(1,2))
acf(auto.ur$residuals, lag = 20)
pacf(auto.ur$residuals, lag = 20)
La función automática, con la cuál se basa el segundo modelo, genera un ARMA(1,1). A pesar de que en la validación se muestre que los residuales no siguen un proceso de ruido blanco, este modelo se empleará exclusivamente para hacer un comparativo en los pronósticos realizados a continuación.
entrenamiento.ur = subset(ur.ts[,"UR"], end = length(ur.ts[,"UR"])-40)
prueba.ur = subset(ur.ts[,"UR"] , start = length(ur.ts[,"UR"])-39)
ar13.entrenamiento = Arima(entrenamiento.ur, order = c(13,0,0))
arma11.entrenamiento = Arima(entrenamiento.ur, order = c(1,0,1))
ur.pron.ar13 = forecast(ar13.entrenamiento, h=40)
ur.pron.arma11 = forecast(arma11.entrenamiento, h=40)
# Error de pronóstico
e.ar13 = prueba.ur - ur.pron.ar13$mean
e.arma11 = prueba.ur - ur.pron.arma11$mean
#Error absoluto medio (MAE)
options(digits = 4)
MAE.ar13 = sum(abs(e.ar13)/length(prueba.ur))
MAE.ar13
[1] 0.536
MAE.arma11= sum(abs(e.arma11)/length(prueba.ur))
MAE.arma11
[1] 0.5253
# dm.test(abs(e.ar13), abs(e.arma11), alternative = "two.sided", h = 20, power = 1)
far13 <- function(x, h){forecast(Arima(x, order=c(13,0,0)), h=h)}
farma11 <- function(x, h){forecast(Arima(x, order=c(1,0,1)), h=h)}
e.ar13.rw <- tsCV(ur.ts[,"UR"], far13, h = 1, window = 192)
e.arma11.rw <- tsCV(ur.ts[,"UR"], farma11, h = 1, window = 192)
dm.test(abs(e.ar13.rw), abs(e.arma11.rw),
alternative = "two.sided", h = 1, power = 1)
Diebold-Mariano Test
data: abs(e.ar13.rw)abs(e.arma11.rw)
DM = 0.26, Forecast horizon = 1, Loss function power = 1, p-value = 0.8
alternative hypothesis: two.sided
pron.ar13 = forecast(modelo.ar13, h = 100)
pron.ar13
# Modelo 1
modelo1 = ar13.entrenamiento%>%
forecast(h=20)%>%
autoplot(xlab = "", ylab = "Tasa de desempleo (%)") + autolayer(prueba.ur)
# Modelo 2
modelo2 =arma11.entrenamiento%>%
forecast(h=20)%>%
autoplot(xlab = "", ylab = "Tasa de desempleo (%)") + autolayer(prueba.ur)
modelo1
modelo2
NA
NA
NA