Objetivo: comprender las formas funcionales (nivel–nivel, log–nivel, nivel–log, log–log) y el efecto de cambios de unidades, con implementación reproducible en RStudio (lista para RPubs).
options(digits = 4)
set.seed(123)
pkgs <- c("wooldridge", "dplyr", "ggplot2", "broom", "scales")
to_install <- setdiff(pkgs, rownames(installed.packages()))
if (length(to_install)) install.packages(to_install, repos = "https://cran.rstudio.com/")
invisible(lapply(pkgs, library, character.only = TRUE))
data(wage1, package = "wooldridge")
wage1 <- wage1 %>%
filter(!is.na(wage), !is.na(educ), wage > 0, educ > 0)
glimpse(wage1)
## Rows: 524
## Columns: 24
## $ wage <dbl> 3.10, 3.24, 3.00, 6.00, 5.30, 8.75, 11.25, 5.00, 3.60, 18.18,…
## $ educ <int> 11, 12, 11, 8, 12, 16, 18, 12, 12, 17, 16, 13, 12, 12, 12, 16…
## $ exper <int> 2, 22, 2, 44, 7, 9, 15, 5, 26, 22, 8, 3, 15, 18, 31, 14, 10, …
## $ tenure <int> 0, 2, 0, 28, 2, 8, 7, 3, 4, 21, 2, 0, 0, 3, 15, 0, 0, 10, 0, …
## $ nonwhite <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ female <int> 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1…
## $ married <int> 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0…
## $ numdep <int> 2, 3, 2, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 1, 1, 0, 0, 3, 0, 0…
## $ smsa <int> 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ northcen <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ south <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ west <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ construc <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ndurman <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ trcommpu <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ trade <int> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ services <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ profserv <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1…
## $ profocc <int> 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1…
## $ clerocc <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ servocc <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ lwage <dbl> 1.1314, 1.1756, 1.0986, 1.7918, 1.6677, 2.1691, 2.4204, 1.609…
## $ expersq <int> 4, 484, 4, 1936, 49, 81, 225, 25, 676, 484, 64, 9, 225, 324, …
## $ tenursq <int> 0, 4, 0, 784, 4, 64, 49, 9, 16, 441, 4, 0, 0, 9, 225, 0, 0, 1…
Ejemplo rápido (hipotético con
wage ~ educ):
m0 <- lm(wage ~ educ, data = wage1)
coef(m0)
## (Intercept) educ
## -1.3021 0.5715
wage_dol <- wage1 %>% mutate(wage_dol = 100 * wage) # como si cambiáramos escala de y
m0_y2 <- lm(wage_dol ~ educ, data = wage_dol)
coef(m0_y2) # intercepto y pendiente multiplicados por 100
## (Intercept) educ
## -130.21 57.15
| Modelo | Ecuación | Interpretación de \(\beta_1\) |
|---|---|---|
| Nivel–Nivel | \(y = \beta_0 + \beta_1 x + u\) | \(\Delta y = \beta_1 \Delta x\) |
| Log–Nivel | \(\log y = \beta_0 + \beta_1 x + u\) | \(\%\Delta y \approx 100 \beta_1 \Delta x\) |
| Nivel–Log | \(y = \beta_0 + \beta_1 \log x + u\) | \(\Delta y \approx (\beta_1/100) \%\Delta x\) |
| Log–Log | \(\log y = \beta_0 + \beta_1 \log x + u\) | Elasticidad: \(\%\Delta y = \beta_1 \%\Delta x\) |
m1 <- lm(wage ~ educ, data = wage1) # Nivel–Nivel
m2 <- lm(log(wage) ~ educ, data = wage1) # Log–Nivel
m3 <- lm(wage ~ log(educ), data = wage1) # Nivel–Log
m4 <- lm(log(wage) ~ log(educ), data = wage1) # Log–Log
mods <- list(`Nivel–Nivel`=m1, `Log–Nivel`=m2, `Nivel–Log`=m3, `Log–Log`=m4)
lapply(mods, function(m) broom::tidy(m))
## $`Nivel–Nivel`
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -1.30 0.714 -1.82 6.87e- 2
## 2 educ 0.572 0.0554 10.3 7.54e-23
##
## $`Log–Nivel`
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.525 0.101 5.18 3.26e- 7
## 2 educ 0.0872 0.00787 11.1 8.14e-26
##
## $`Nivel–Log`
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.46 1.53 -4.87 1.49e- 6
## 2 log(educ) 5.33 0.608 8.77 2.60e-17
##
## $`Log–Log`
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.445 0.218 -2.04 4.17e- 2
## 2 log(educ) 0.825 0.0864 9.55 5.19e-20
Resumen de ajuste (R² y n):
data.frame(
modelo = names(mods),
R2 = sapply(mods, function(m) summary(m)$r.squared),
n = sapply(mods, function(m) nobs(m))
)
## modelo R2 n
## Nivel–Nivel Nivel–Nivel 0.1695 524
## Log–Nivel Log–Nivel 0.1907 524
## Nivel–Log Nivel–Log 0.1283 524
## Log–Log Log–Log 0.1486 524
ggplot(wage1, aes(educ, wage)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Relación salario–educación (nivel–nivel)",
x = "Años de educación", y = "Salario por hora (USD)") +
theme_minimal(base_size = 13)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(wage1, aes(log(educ), log(wage))) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Modelo log–log: elasticidad salario–educación",
x = "log(educ)", y = "log(wage)") +
theme_minimal(base_size = 13)
## `geom_smooth()` using formula = 'y ~ x'
## Ejemplo: en log–nivel, 1 año adicional ⇒ 8.7% más de salario.
## En log–log, elasticidad salario–educación ≈ 0.825.
exper (años de
experiencia) en lugar de educ.