library(wooldridge)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.1.5
## ✔ ggplot2 4.0.0 ✔ stringr 1.5.2
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Cargando paquete requerido: carData
##
## Adjuntando el paquete: 'car'
##
## The following object is masked from 'package:purrr':
##
## some
##
## The following object is masked from 'package:dplyr':
##
## recode
library(lmtest)
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
library(performance)
library(broom)
library(MASS)
##
## Adjuntando el paquete: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## The following object is masked from 'package:wooldridge':
##
## cement
library(ggpubr)
data("beauty")
View(beauty)
summary(beauty)
## wage lwage belavg abvavg
## Min. : 1.020 Min. :0.0198 Min. :0.000 Min. :0.000
## 1st Qu.: 3.708 1st Qu.:1.3104 1st Qu.:0.000 1st Qu.:0.000
## Median : 5.300 Median :1.6677 Median :0.000 Median :0.000
## Mean : 6.307 Mean :1.6588 Mean :0.123 Mean :0.304
## 3rd Qu.: 7.695 3rd Qu.:2.0406 3rd Qu.:0.000 3rd Qu.:1.000
## Max. :77.720 Max. :4.3531 Max. :1.000 Max. :1.000
## exper looks union goodhlth
## Min. : 0.00 Min. :1.000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 8.00 1st Qu.:3.000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :15.00 Median :3.000 Median :0.0000 Median :1.0000
## Mean :18.21 Mean :3.186 Mean :0.2722 Mean :0.9333
## 3rd Qu.:27.00 3rd Qu.:4.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :48.00 Max. :5.000 Max. :1.0000 Max. :1.0000
## black female married south
## Min. :0.00000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.000 Median :1.0000 Median :0.0000
## Mean :0.07381 Mean :0.346 Mean :0.6913 Mean :0.1746
## 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.000 Max. :1.0000 Max. :1.0000
## bigcity smllcity service expersq
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. : 0.0
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 64.0
## Median :0.000 Median :0.0000 Median :0.0000 Median : 225.0
## Mean :0.219 Mean :0.4667 Mean :0.2738 Mean : 474.5
## 3rd Qu.:0.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 729.0
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :2304.0
## educ
## Min. : 5.00
## 1st Qu.:12.00
## Median :12.00
## Mean :12.56
## 3rd Qu.:13.00
## Max. :17.00
str(beauty)
## 'data.frame': 1260 obs. of 17 variables:
## $ wage : num 5.73 4.28 7.96 11.57 11.42 ...
## $ lwage : num 1.75 1.45 2.07 2.45 2.44 ...
## $ belavg : int 0 0 0 0 0 0 0 0 0 0 ...
## $ abvavg : int 1 0 1 0 0 0 0 1 0 0 ...
## $ exper : int 30 28 35 38 27 20 12 5 5 12 ...
## $ looks : int 4 3 4 3 3 3 3 4 3 3 ...
## $ union : int 0 0 0 0 0 0 0 1 0 0 ...
## $ goodhlth: int 1 1 1 1 1 0 1 1 1 1 ...
## $ black : int 0 0 0 0 0 0 0 0 0 0 ...
## $ female : int 1 1 1 0 0 1 0 0 1 1 ...
## $ married : int 1 1 0 1 1 1 1 0 0 0 ...
## $ south : int 0 1 0 0 0 0 0 0 0 0 ...
## $ bigcity : int 0 0 0 1 0 1 1 0 0 0 ...
## $ smllcity: int 1 1 1 0 1 0 0 1 0 1 ...
## $ service : int 1 0 0 1 0 0 0 0 0 0 ...
## $ expersq : int 900 784 1225 1444 729 400 144 25 25 144 ...
## $ educ : int 14 12 10 16 16 12 16 16 16 12 ...
## - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
y_var <- "lwage"
x_vars <- c("educ", "exper", "belavg", "female")
beauty <- beauty |>
dplyr::select(dplyr::all_of(c(y_var, x_vars))) |>
na.omit()
# Estimamos el modelo
modelo <- modelo <- lm(lwage ~ educ + exper + female + belavg, data = beauty)
coef(modelo)
## (Intercept) educ exper female belavg
## 0.69273094 0.07108616 0.01376923 -0.46192206 -0.14526264
summary(modelo)
##
## Call:
## lm(formula = lwage ~ educ + exper + female + belavg, data = beauty)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.46011 -0.30735 0.00656 0.30764 3.07426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.692731 0.077465 8.942 < 2e-16 ***
## educ 0.071086 0.005314 13.377 < 2e-16 ***
## exper 0.013769 0.001200 11.477 < 2e-16 ***
## female -0.461922 0.029634 -15.588 < 2e-16 ***
## belavg -0.145263 0.041743 -3.480 0.000519 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4842 on 1255 degrees of freedom
## Multiple R-squared: 0.3388, Adjusted R-squared: 0.3367
## F-statistic: 160.8 on 4 and 1255 DF, p-value: < 2.2e-16
#3) linealidad
#cr_PL
cr_pl <- car::crPlots(modelo)

#RESET
reset_lm <- lmtest::resettest(modelo, power = 2:3, type = "fitted")
reset_lm
##
## RESET test
##
## data: modelo
## RESET = 1.14, df1 = 2, df2 = 1253, p-value = 0.3202
#HOMOCEDASTICIDAD Residuo vs Ajustado
augment(modelo) |>
ggplot(aes(.fitted, .resid)) +
geom_hline(yintercept = 0, linetype = 2) +
geom_point(alpha = 0.6) +
geom_smooth(method = "loess", se = FALSE) +
labs(title = "Residuos vs Ajustados", x = "Ajustados", y = "Residuos")
## `geom_smooth()` using formula = 'y ~ x'

#HOMOCEDASTICIDAD TEST FROMALES
bp <- lmtest::bptest(modelo) # H0: Var(u|X) = sigma^2 (homoced.)
bp
##
## studentized Breusch-Pagan test
##
## data: modelo
## BP = 1.1911, df = 4, p-value = 0.8796
X <- model.matrix(modelo)
white <- lmtest::bptest(modelo, ~ fitted(modelo) + I(fitted(modelo)^2))
white
##
## studentized Breusch-Pagan test
##
## data: modelo
## BP = 1.1297, df = 2, p-value = 0.5685
#3) Normalidad
# Shapiro
shapiro.test(residuals(modelo))
##
## Shapiro-Wilk normality test
##
## data: residuals(modelo)
## W = 0.98667, p-value = 2.499e-09
# Jarque–Bera
performance::check_normality(modelo)
## Warning: Non-normality of residuals detected (p < .001).
# QQ-plot de los residuos
ggplot(mapping = aes(sample = residuals(modelo))) +
stat_qq(color = "steelblue", alpha = 0.6) +
stat_qq_line(color = "red", linetype = "dashed") +
labs(title = "QQ-plot de residuos del modelo OLS",
x = "Cuantiles teóricos",
y = "Cuantiles de residuos") +
theme_minimal()

coeftest(modelo, vcov = vcovHC(modelo, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6927309 0.0810149 8.5507 < 2.2e-16 ***
## educ 0.0710862 0.0054634 13.0115 < 2.2e-16 ***
## exper 0.0137692 0.0012098 11.3815 < 2.2e-16 ***
## female -0.4619221 0.0296962 -15.5549 < 2.2e-16 ***
## belavg -0.1452626 0.0399460 -3.6365 0.0002876 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Multicolinealidad
# VIF
vifs <- car::vif(modelo)
vifs
## educ exper female belavg
## 1.044603 1.106313 1.068094 1.010352
# Número de condición de la matriz de diseño
kappa_X <- kappa(model.matrix(modelo))
kappa_X
## [1] 122.1896
# Cargar el paquete
library(modelsummary)
# 1. Ajustar tu modelo (asumiendo que los datos se llaman 'datos')
modelo <- lm(lwage ~ educ + exper + female + belavg, data = beauty)
# 2. Crear la tabla
# 'statistic = "conf.int"' le pide que muestre los IC 95%
# 'estimate = "{estimate} {stars}"' mantiene los coeficientes y sus estrellas
modelsummary(
modelo,
statistic = "conf.int",
estimate = "{estimate} {stars}",
title = "Tabla 1: Regresión de Salarios (Errores Clásicos)"
)
Tabla 1: Regresión de Salarios (Errores Clásicos)
| |
(1) |
| (Intercept) |
0.693 *** |
|
[0.541, 0.845] |
| educ |
0.071 *** |
|
[0.061, 0.082] |
| exper |
0.014 *** |
|
[0.011, 0.016] |
| female |
-0.462 *** |
|
[-0.520, -0.404] |
| belavg |
-0.145 *** |
|
[-0.227, -0.063] |
| Num.Obs. |
1260 |
| R2 |
0.339 |
| R2 Adj. |
0.337 |
| AIC |
1755.0 |
| BIC |
1785.8 |
| Log.Lik. |
-871.481 |
| F |
160.783 |
| RMSE |
0.48 |
# 'vcov = "HC1"' le pide que use los errores robustos (tipo HC1)
# El resto de la tabla se ajusta automáticamente
modelsummary(
modelo,
statistic = "conf.int",
estimate = "{estimate} {stars}",
vcov = "HC1", # ¡Esta es la clave!
title = "Tabla 2: Regresión de Salarios (Errores Robustos HC1)"
)
Tabla 2: Regresión de Salarios (Errores Robustos HC1)
| |
(1) |
| (Intercept) |
0.693 *** |
|
[0.534, 0.852] |
| educ |
0.071 *** |
|
[0.060, 0.082] |
| exper |
0.014 *** |
|
[0.011, 0.016] |
| female |
-0.462 *** |
|
[-0.520, -0.404] |
| belavg |
-0.145 *** |
|
[-0.224, -0.067] |
| Num.Obs. |
1260 |
| R2 |
0.339 |
| R2 Adj. |
0.337 |
| AIC |
1755.0 |
| BIC |
1785.8 |
| Log.Lik. |
-871.481 |
| F |
171.082 |
| RMSE |
0.48 |
| Std.Errors |
HC1 |