library(wooldridge)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ readr     2.1.5
## ✔ ggplot2   4.0.0     ✔ stringr   1.5.2
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Cargando paquete requerido: carData
## 
## Adjuntando el paquete: 'car'
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
library(lmtest)
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
library(performance)
library(broom)
library(MASS)
## 
## Adjuntando el paquete: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## The following object is masked from 'package:wooldridge':
## 
##     cement
library(ggpubr)


data("beauty")
View(beauty)       
summary(beauty)    
##       wage            lwage            belavg          abvavg     
##  Min.   : 1.020   Min.   :0.0198   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 3.708   1st Qu.:1.3104   1st Qu.:0.000   1st Qu.:0.000  
##  Median : 5.300   Median :1.6677   Median :0.000   Median :0.000  
##  Mean   : 6.307   Mean   :1.6588   Mean   :0.123   Mean   :0.304  
##  3rd Qu.: 7.695   3rd Qu.:2.0406   3rd Qu.:0.000   3rd Qu.:1.000  
##  Max.   :77.720   Max.   :4.3531   Max.   :1.000   Max.   :1.000  
##      exper           looks           union           goodhlth     
##  Min.   : 0.00   Min.   :1.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 8.00   1st Qu.:3.000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :15.00   Median :3.000   Median :0.0000   Median :1.0000  
##  Mean   :18.21   Mean   :3.186   Mean   :0.2722   Mean   :0.9333  
##  3rd Qu.:27.00   3rd Qu.:4.000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :48.00   Max.   :5.000   Max.   :1.0000   Max.   :1.0000  
##      black             female         married           south       
##  Min.   :0.00000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.000   Median :1.0000   Median :0.0000  
##  Mean   :0.07381   Mean   :0.346   Mean   :0.6913   Mean   :0.1746  
##  3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.00000   Max.   :1.000   Max.   :1.0000   Max.   :1.0000  
##     bigcity         smllcity         service          expersq      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :   0.0  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:  64.0  
##  Median :0.000   Median :0.0000   Median :0.0000   Median : 225.0  
##  Mean   :0.219   Mean   :0.4667   Mean   :0.2738   Mean   : 474.5  
##  3rd Qu.:0.000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.: 729.0  
##  Max.   :1.000   Max.   :1.0000   Max.   :1.0000   Max.   :2304.0  
##       educ      
##  Min.   : 5.00  
##  1st Qu.:12.00  
##  Median :12.00  
##  Mean   :12.56  
##  3rd Qu.:13.00  
##  Max.   :17.00
str(beauty)       
## 'data.frame':    1260 obs. of  17 variables:
##  $ wage    : num  5.73 4.28 7.96 11.57 11.42 ...
##  $ lwage   : num  1.75 1.45 2.07 2.45 2.44 ...
##  $ belavg  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ abvavg  : int  1 0 1 0 0 0 0 1 0 0 ...
##  $ exper   : int  30 28 35 38 27 20 12 5 5 12 ...
##  $ looks   : int  4 3 4 3 3 3 3 4 3 3 ...
##  $ union   : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ goodhlth: int  1 1 1 1 1 0 1 1 1 1 ...
##  $ black   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ female  : int  1 1 1 0 0 1 0 0 1 1 ...
##  $ married : int  1 1 0 1 1 1 1 0 0 0 ...
##  $ south   : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ bigcity : int  0 0 0 1 0 1 1 0 0 0 ...
##  $ smllcity: int  1 1 1 0 1 0 0 1 0 1 ...
##  $ service : int  1 0 0 1 0 0 0 0 0 0 ...
##  $ expersq : int  900 784 1225 1444 729 400 144 25 25 144 ...
##  $ educ    : int  14 12 10 16 16 12 16 16 16 12 ...
##  - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
y_var <- "lwage"
x_vars <- c("educ", "exper", "belavg", "female")

beauty <- beauty |>
  dplyr::select(dplyr::all_of(c(y_var, x_vars))) |>
  na.omit()


# Estimamos el modelo
modelo <- modelo <- lm(lwage ~ educ + exper + female + belavg, data = beauty)
coef(modelo)
## (Intercept)        educ       exper      female      belavg 
##  0.69273094  0.07108616  0.01376923 -0.46192206 -0.14526264
summary(modelo)
## 
## Call:
## lm(formula = lwage ~ educ + exper + female + belavg, data = beauty)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.46011 -0.30735  0.00656  0.30764  3.07426 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.692731   0.077465   8.942  < 2e-16 ***
## educ         0.071086   0.005314  13.377  < 2e-16 ***
## exper        0.013769   0.001200  11.477  < 2e-16 ***
## female      -0.461922   0.029634 -15.588  < 2e-16 ***
## belavg      -0.145263   0.041743  -3.480 0.000519 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4842 on 1255 degrees of freedom
## Multiple R-squared:  0.3388, Adjusted R-squared:  0.3367 
## F-statistic: 160.8 on 4 and 1255 DF,  p-value: < 2.2e-16
#3) linealidad

#cr_PL

cr_pl <- car::crPlots(modelo) 

#RESET
reset_lm <- lmtest::resettest(modelo, power = 2:3, type = "fitted")
reset_lm
## 
##  RESET test
## 
## data:  modelo
## RESET = 1.14, df1 = 2, df2 = 1253, p-value = 0.3202
#HOMOCEDASTICIDAD Residuo vs Ajustado
augment(modelo) |>
  ggplot(aes(.fitted, .resid)) +
  geom_hline(yintercept = 0, linetype = 2) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "loess", se = FALSE) +
  labs(title = "Residuos vs Ajustados", x = "Ajustados", y = "Residuos")
## `geom_smooth()` using formula = 'y ~ x'

#HOMOCEDASTICIDAD TEST FROMALES
  
bp <- lmtest::bptest(modelo)                 # H0: Var(u|X) = sigma^2  (homoced.)
bp
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo
## BP = 1.1911, df = 4, p-value = 0.8796
X <- model.matrix(modelo)
white <- lmtest::bptest(modelo, ~ fitted(modelo) + I(fitted(modelo)^2))
white
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo
## BP = 1.1297, df = 2, p-value = 0.5685
#3) Normalidad

# Shapiro
shapiro.test(residuals(modelo))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(modelo)
## W = 0.98667, p-value = 2.499e-09
# Jarque–Bera 
performance::check_normality(modelo)
## Warning: Non-normality of residuals detected (p < .001).
# QQ-plot de los residuos
ggplot(mapping = aes(sample = residuals(modelo))) +
  stat_qq(color = "steelblue", alpha = 0.6) +
  stat_qq_line(color = "red", linetype = "dashed") +
  labs(title = "QQ-plot de residuos del modelo OLS",
       x = "Cuantiles teóricos",
       y = "Cuantiles de residuos") +
  theme_minimal()

coeftest(modelo, vcov = vcovHC(modelo, type = "HC1"))
## 
## t test of coefficients:
## 
##               Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)  0.6927309  0.0810149   8.5507 < 2.2e-16 ***
## educ         0.0710862  0.0054634  13.0115 < 2.2e-16 ***
## exper        0.0137692  0.0012098  11.3815 < 2.2e-16 ***
## female      -0.4619221  0.0296962 -15.5549 < 2.2e-16 ***
## belavg      -0.1452626  0.0399460  -3.6365 0.0002876 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Multicolinealidad
# VIF 
vifs <- car::vif(modelo)
vifs
##     educ    exper   female   belavg 
## 1.044603 1.106313 1.068094 1.010352
# Número de condición de la matriz de diseño
kappa_X <- kappa(model.matrix(modelo))
kappa_X
## [1] 122.1896
# Cargar el paquete
library(modelsummary)

# 1. Ajustar tu modelo (asumiendo que los datos se llaman 'datos')
modelo <- lm(lwage ~ educ + exper + female + belavg, data = beauty)

# 2. Crear la tabla
# 'statistic = "conf.int"' le pide que muestre los IC 95%
# 'estimate = "{estimate} {stars}"' mantiene los coeficientes y sus estrellas
modelsummary(
  modelo,
  statistic = "conf.int",
  estimate = "{estimate} {stars}",
  title = "Tabla 1: Regresión de Salarios (Errores Clásicos)"
)
Tabla 1: Regresión de Salarios (Errores Clásicos)
(1)
(Intercept) 0.693 ***
[0.541, 0.845]
educ 0.071 ***
[0.061, 0.082]
exper 0.014 ***
[0.011, 0.016]
female -0.462 ***
[-0.520, -0.404]
belavg -0.145 ***
[-0.227, -0.063]
Num.Obs. 1260
R2 0.339
R2 Adj. 0.337
AIC 1755.0
BIC 1785.8
Log.Lik. -871.481
F 160.783
RMSE 0.48
# 'vcov = "HC1"' le pide que use los errores robustos (tipo HC1)
# El resto de la tabla se ajusta automáticamente
modelsummary(
  modelo,
  statistic = "conf.int",
  estimate = "{estimate} {stars}",
  vcov = "HC1",  # ¡Esta es la clave!
  title = "Tabla 2: Regresión de Salarios (Errores Robustos HC1)"
)
Tabla 2: Regresión de Salarios (Errores Robustos HC1)
(1)
(Intercept) 0.693 ***
[0.534, 0.852]
educ 0.071 ***
[0.060, 0.082]
exper 0.014 ***
[0.011, 0.016]
female -0.462 ***
[-0.520, -0.404]
belavg -0.145 ***
[-0.224, -0.067]
Num.Obs. 1260
R2 0.339
R2 Adj. 0.337
AIC 1755.0
BIC 1785.8
Log.Lik. -871.481
F 171.082
RMSE 0.48
Std.Errors HC1