library(lmtest) # bgtest, bptest, dwtest
library(stargazer) # presentación
library(equatiomatic) # extract_eq(), takes a fitted model object as its input
# and returns the corresponding 'LaTeX' code for the model
library(mlbench) # contiene el dataframe BostonHousing
library(modelsummary) # presentación
library(forecast) # prediccion con forecast
library(dplyr) # manejo de datos y operador "pipe" %>%
library(caret) # permite muestreo sobre los dataframe
library(DescTools) # contiene las funciones para calcular las
# medidas de perfomance# `medv ~.` indica -> "medv" en funcion del resto de variables del dataframe
modelo_boston_mco <- lm(medv ~., data = BostonHousing)
extract_eq(modelo_boston_mco, wrap = TRUE)\[ \begin{aligned} \operatorname{medv} &= \alpha + \beta_{1}(\operatorname{crim}) + \beta_{2}(\operatorname{zn}) + \beta_{3}(\operatorname{indus})\ + \\ &\quad \beta_{4}(\operatorname{chas}_{\operatorname{1}}) + \beta_{5}(\operatorname{nox}) + \beta_{6}(\operatorname{rm}) + \beta_{7}(\operatorname{age})\ + \\ &\quad \beta_{8}(\operatorname{dis}) + \beta_{9}(\operatorname{rad}) + \beta_{10}(\operatorname{tax}) + \beta_{11}(\operatorname{ptratio})\ + \\ &\quad \beta_{12}(\operatorname{b}) + \beta_{13}(\operatorname{lstat}) + \epsilon \end{aligned} \]
stargazer(modelo_boston_mco, type = "html",
title = "MCO precio de viviendas en Boston",
report = "vc*tp",
no.space = TRUE)##
## <table style="text-align:center"><caption><strong>MCO precio de viviendas en Boston</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>medv</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">crim</td><td>-0.108<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = -3.287</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.002</td></tr>
## <tr><td style="text-align:left">zn</td><td>0.046<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = 3.382</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.001</td></tr>
## <tr><td style="text-align:left">indus</td><td>0.021</td></tr>
## <tr><td style="text-align:left"></td><td>t = 0.334</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.739</td></tr>
## <tr><td style="text-align:left">chas1</td><td>2.687<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = 3.118</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.002</td></tr>
## <tr><td style="text-align:left">nox</td><td>-17.767<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = -4.651</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.00001</td></tr>
## <tr><td style="text-align:left">rm</td><td>3.810<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = 9.116</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.000</td></tr>
## <tr><td style="text-align:left">age</td><td>0.001</td></tr>
## <tr><td style="text-align:left"></td><td>t = 0.052</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.959</td></tr>
## <tr><td style="text-align:left">dis</td><td>-1.476<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = -7.398</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.000</td></tr>
## <tr><td style="text-align:left">rad</td><td>0.306<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = 4.613</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.00001</td></tr>
## <tr><td style="text-align:left">tax</td><td>-0.012<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = -3.280</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.002</td></tr>
## <tr><td style="text-align:left">ptratio</td><td>-0.953<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = -7.283</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.000</td></tr>
## <tr><td style="text-align:left">b</td><td>0.009<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = 3.467</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.001</td></tr>
## <tr><td style="text-align:left">lstat</td><td>-0.525<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = -10.347</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.000</td></tr>
## <tr><td style="text-align:left">Constant</td><td>36.459<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>t = 7.144</td></tr>
## <tr><td style="text-align:left"></td><td>p = 0.000</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>506</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.741</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.734</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>4.745 (df = 492)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>108.077<sup>***</sup> (df = 13; 492)</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
predict de R (sin paquetes adicionales)# prediccion usando `predict`
predict(object = modelo_boston_mco,
newdata = X_m,
interval = "prediction",
level = confidense,
se.fit = TRUE) -> predicciones
rownames(predicciones$fit)<-as.character(confidense*100)
colnames(predicciones$fit)<-c("Ym","Li","Ls")
stargazer(predicciones$fit,
title = "Pronósticos e intervalos de confianza",
type = "html") #Poner results='asis' en opciones del chunk| Ym | Li | Ls | |
| 95 | 26.067 | 15.511 | 36.624 |
| 99 | 26.067 | 12.174 | 39.961 |
# pronostico con forecast
pronostico <- forecast(object = modelo_boston_mco,
level = confidense,
newdata = X_m, ts = FALSE)
kableExtra::kable(pronostico,
caption = "Pronosticos e intervalos de confianza",
digits = 2, format = "html")| Point Forecast | Lo 95 | Hi 95 | Lo 99 | Hi 99 |
|---|---|---|---|---|
| 26.07 | 15.51 | 36.62 | 12.17 | 39.96 |
# Funciones de descomposición del coeficiente U de Theil
Um <- function(pronosticado, observado){ # proporción de SESGO
((mean(pronosticado) - mean(observado))^2) / MSE(pronosticado, observado)
}
Us <- function(pronosticado, observado){ # proporción de VARIANZA
((sd(pronosticado) - sd(observado))^2) / MSE(pronosticado, observado)
}
Uc <- function(pronosticado, observado){ # proporción de COVARIANZA (no sistemático)
(2 * (1 - cor(pronosticado, observado)) * sd(pronosticado) * sd(observado)) /
MSE(pronosticado, observado)
}set.seed(50) # semilla aleatoria, permite reproducibilidad
numero_muestras <- 1000 # numero de muestras que se obtendran del dataframe
# 1. Crear la lista con las 1000 muestras
muestras <- BostonHousing$medv %>%
caret::createDataPartition(p = 0.8,
times = numero_muestras,
list = TRUE)
# 2. Listas vacias que contendran los datos de entrenamiento,
# los pronosticos para los datos de prueba y para las
# estadisticas de cada muestra
modelos_entrenamiento <- vector(mode = "list",
length = numero_muestras)
pronostico_prueba <- vector(mode = "list",
length = numero_muestras)
resultados_performance_data_entrenamiento <- vector(mode = "list",
length = numero_muestras)
resultados_performance <- vector(mode = "list",
length = numero_muestras)
# 3. Estimacion de los modelos lineales para cada muestra,
# los pronosticos y calculo de las estadisticas de perfomance
for (j in 1: numero_muestras) {
datos_entrenamiento <- BostonHousing[muestras[[j]],]
datos_prueba <- BostonHousing[-muestras[[j]],]
modelos_entrenamiento[[j]] <- lm(medv ~. , data = datos_entrenamiento)
pronostico_prueba[[j]] <- modelos_entrenamiento[[j]] %>% predict(datos_prueba)
# Desempeño dentro de muestra (ajuste interno)
resultados_performance_data_entrenamiento[[j]] <- data.frame(
R2 = caret::R2(modelos_entrenamiento[[j]]$fitted.values,
datos_entrenamiento$medv),
RMSE = DescTools::RMSE(modelos_entrenamiento[[j]]$fitted.values,
datos_entrenamiento$medv),
MAE = DescTools::MAE(modelos_entrenamiento[[j]]$fitted.values,
datos_entrenamiento$medv),
MAPE = DescTools::MAPE(modelos_entrenamiento[[j]]$fitted.values,
datos_entrenamiento$medv)*100,
THEIL = DescTools::TheilU(modelos_entrenamiento[[j]]$fitted.values,
datos_entrenamiento$medv, type = 1),
Um = Um(modelos_entrenamiento[[j]]$fitted.values,
datos_entrenamiento$medv),
Us = Us(modelos_entrenamiento[[j]]$fitted.values,
datos_entrenamiento$medv),
Uc = Uc(modelos_entrenamiento[[j]]$fitted.values,
datos_entrenamiento$medv)
)
# Desempeño fuera de muestra (ajuste genuino)
resultados_performance[[j]] <- data.frame(
R2 = caret::R2(pronostico_prueba[[j]], datos_prueba$medv),
RMSE = DescTools::RMSE(pronostico_prueba[[j]], datos_prueba$medv),
MAE = DescTools::MAE(pronostico_prueba[[j]], datos_prueba$medv),
MAPE = DescTools::MAPE(pronostico_prueba[[j]], datos_prueba$medv)*100,
THEIL = DescTools::TheilU(pronostico_prueba[[j]], datos_prueba$medv, type = 1),
Um = Um(pronostico_prueba[[j]], datos_prueba$medv),
Us = Us(pronostico_prueba[[j]], datos_prueba$medv),
Uc = Uc(pronostico_prueba[[j]], datos_prueba$medv)
)
}bind_rows(resultados_performance_data_entrenamiento) %>%
stargazer(title = "Medidas de desempeño — dentro de muestra (datos de entrenamiento)",
type = "html", digits = 3)| Statistic | N | Mean | St. Dev. | Min | Max |
| R2 | 1,000 | 0.743 | 0.013 | 0.713 | 0.794 |
| RMSE | 1,000 | 4.653 | 0.141 | 4.177 | 4.948 |
| MAE | 1,000 | 3.265 | 0.095 | 2.905 | 3.512 |
| MAPE | 1,000 | 16.387 | 0.464 | 14.813 | 17.691 |
| THEIL | 1,000 | 0.096 | 0.003 | 0.087 | 0.102 |
| Um | 1,000 | 0.000 | 0.000 | 0 | 0 |
| Us | 1,000 | 0.074 | 0.004 | 0.058 | 0.085 |
| Uc | 1,000 | 0.928 | 0.004 | 0.918 | 0.945 |
bind_rows(resultados_performance) %>%
stargazer(title = "Medidas de desempeño — fuera de muestra (validación, 15.000 iteraciones)",
type = "html", digits = 3)| Statistic | N | Mean | St. Dev. | Min | Max |
| R2 | 1,000 | 0.723 | 0.056 | 0.452 | 0.840 |
| RMSE | 1,000 | 4.862 | 0.575 | 3.465 | 6.961 |
| MAE | 1,000 | 3.411 | 0.281 | 2.633 | 4.492 |
| MAPE | 1,000 | 17.197 | 1.618 | 12.875 | 23.137 |
| THEIL | 1,000 | 0.101 | 0.012 | 0.073 | 0.148 |
| Um | 1,000 | 0.011 | 0.016 | 0.000 | 0.205 |
| Us | 1,000 | 0.081 | 0.066 | 0.00000 | 0.333 |
| Uc | 1,000 | 0.918 | 0.066 | 0.667 | 1.010 |