Instalación de Paquetes y Carga de Librerías

req_pkgs <- c("wooldridge","tidyverse","car","lmtest","sandwich",
              "broom","MASS","ggpubr","moments","performance","knitr","modelsummary")
inst <- req_pkgs[!req_pkgs %in% installed.packages()[,"Package"]]
if(length(inst)) install.packages(inst, dependencies = TRUE)
invisible(lapply(req_pkgs, library, character.only = TRUE))

## Warning: package 'wooldridge' was built under R version 4.4.3

## Warning: package 'tidyverse' was built under R version 4.4.3

## Warning: package 'ggplot2' was built under R version 4.4.3

## Warning: package 'tibble' was built under R version 4.4.2

## Warning: package 'tidyr' was built under R version 4.4.2

## Warning: package 'readr' was built under R version 4.4.3

## Warning: package 'purrr' was built under R version 4.4.2

## Warning: package 'dplyr' was built under R version 4.4.2

## Warning: package 'stringr' was built under R version 4.4.2

## Warning: package 'forcats' was built under R version 4.4.3

## Warning: package 'lubridate' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

## Warning: package 'car' was built under R version 4.4.3

## Cargando paquete requerido: carData

## Warning: package 'carData' was built under R version 4.4.2

## 
## Adjuntando el paquete: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

## Warning: package 'lmtest' was built under R version 4.4.3

## Cargando paquete requerido: zoo

## Warning: package 'zoo' was built under R version 4.4.2

## 
## Adjuntando el paquete: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Warning: package 'sandwich' was built under R version 4.4.3

## Warning: package 'broom' was built under R version 4.4.3

## 
## Adjuntando el paquete: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## The following object is masked from 'package:wooldridge':
## 
##     cement

## Warning: package 'ggpubr' was built under R version 4.4.3

## Warning: package 'performance' was built under R version 4.4.3

## Warning: package 'knitr' was built under R version 4.4.3

## Warning: package 'modelsummary' was built under R version 4.4.3

if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
library(dplyr)

##Base de datos

data("wage1")
df <- wage1
y_var <- "wage"
x_vars <- c("educ", "exper", "tenure", "female", "married", "expersq")

##Cagar y preparar datos

df_selected <- df[, c(y_var, x_vars)]  # selecciona columnas
df_selected <- na.omit(df_selected)    # elimina NA
df_selected <- df_selected %>% mutate(across(everything(), as.numeric))

head(df_selected)

##   wage educ exper tenure female married expersq
## 1 3.10   11     2      0      1       0       4
## 2 3.24   12    22      2      1       1     484
## 3 3.00   11     2      0      0       0       4
## 4 6.00    8    44     28      0       1    1936
## 5 5.30   12     7      2      0       1      49
## 6 8.75   16     9      8      0       1      81

##Análisis exploratorio: media, mediana, sd, min, max, skewness, kurtosis

tabla_exploratoria <- df_selected %>%
  summarise(
    media = sapply(., mean, na.rm = TRUE),
    mediana = sapply(., median, na.rm = TRUE),
    sd = sapply(., sd, na.rm = TRUE),
    minimo = sapply(., min, na.rm = TRUE),
    maximo = sapply(., max, na.rm = TRUE),
    asimetria = sapply(., skewness, na.rm = TRUE),
    kurtosis = sapply(., kurtosis, na.rm = TRUE)
  ) %>%
  t() %>%                   
  as.data.frame()

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

colnames(tabla_exploratoria) <- names(df_selected)
rownames(tabla_exploratoria) <- c("Media", "Mediana", "Desv.Est.", "Mínimo", "Máximo", "Asimetría", "Curtosis")

##Mostrar tabla en Markdown/Rpubs

kable(tabla_exploratoria, caption = "Estadísticos descriptivos de wage, educ, exper, tenure")

Estadísticos descriptivos de wage, educ, exper, tenure
	wage	educ	exper	tenure	female	married	expersq
Media	5.896103	12.5627376	17.0171103	5.104563	0.4790875	0.6083650	473.435361
Mediana	4.650000	12.0000000	13.5000000	2.000000	0.0000000	1.0000000	182.500000
Desv.Est.	3.693086	2.7690224	13.5721596	7.224462	0.5000380	0.4885804	616.044772
Mínimo	0.530000	0.0000000	1.0000000	0.000000	0.0000000	0.0000000	1.000000
Máximo	24.980000	18.0000000	51.0000000	44.000000	1.0000000	1.0000000	2601.000000
Asimetría	2.007325	-0.6195741	0.7068652	2.110273	0.0837235	-0.4440136	1.492432
Curtosis	7.970083	4.8842450	2.3573176	7.658076	1.0070096	1.1971481	4.296473

##Fórmula y estimación OLS

fml <- as.formula(paste("log(", y_var, ") ~", paste(x_vars, collapse = " + ")))
m_log <- lm(fml, data = df)

##Resumen y R^2

summary(m_log)

## 
## Call:
## lm(formula = fml, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.81854 -0.25682 -0.02526  0.24755  1.18148 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.4158424  0.0993372   4.186 3.33e-05 ***
## educ         0.0798322  0.0068273  11.693  < 2e-16 ***
## exper        0.0300995  0.0051931   5.796 1.18e-08 ***
## tenure       0.0160739  0.0028801   5.581 3.86e-08 ***
## female      -0.2911303  0.0362832  -8.024 6.88e-15 ***
## married      0.0564494  0.0409259   1.379    0.168    
## expersq     -0.0006012  0.0001099  -5.472 6.95e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4014 on 519 degrees of freedom
## Multiple R-squared:  0.4361, Adjusted R-squared:  0.4296 
## F-statistic: 66.91 on 6 and 519 DF,  p-value: < 2.2e-16

#Tabla del summary

modelsummary(m_log, statistic = "({std.error})", title = "Resultados del modelo OLS")

Resultados del modelo OLS
	(1)
(Intercept)	0.416
	(0.099)
educ	0.080
	(0.007)
exper	0.030
	(0.005)
tenure	0.016
	(0.003)
female	-0.291
	(0.036)
married	0.056
	(0.041)
expersq	-0.001
	(0.000)
Num.Obs.	526
R2	0.436
R2 Adj.	0.430
AIC	2249.2
BIC	2283.3
Log.Lik.	-262.755
F	66.906
RMSE	0.40

#Linealidad (gràficos y pruebas)

car::crPlots(m_log)

##Residuos parciales con ggplot

hacer_grafico_pr <- function(modelo, x){
  aug <- broom::augment(modelo)
  beta_x <- coef(modelo)[x]
  if(is.na(beta_x)) stop("La variable no está en el modelo.")
  aug |>
    mutate(partial = .resid + !!sym(x)*beta_x) |>
    ggplot(aes_string(x = x, y = "partial")) +
    geom_point(alpha = 0.6) +
    geom_smooth(method = "loess", se = FALSE) +
    labs(title = paste("Gráfico de residuos parciales:", x),
         y = paste("e +", round(beta_x,4), "*", x))
}
print(hacer_grafico_pr(m_log, x_vars[1]))

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `geom_smooth()` using formula = 'y ~ x'

RESET test

reset_lm <- lmtest::resettest(m_log, power = 2:3, type = "fitted")
reset_lm

## 
##  RESET test
## 
## data:  m_log
## RESET = 7.0291, df1 = 2, df2 = 517, p-value = 0.0009729

#Homoedasticidad

bp <- lmtest::bptest(m_log)
bp

## 
##  studentized Breusch-Pagan test
## 
## data:  m_log
## BP = 17.023, df = 6, p-value = 0.009197

X <- model.matrix(m_log)
white <- lmtest::bptest(m_log, ~ fitted(m_log) + I(fitted(m_log)^2))
white

## 
##  studentized Breusch-Pagan test
## 
## data:  m_log
## BP = 5.1463, df = 2, p-value = 0.0763

Gráfico residuos vs ajustados

broom::augment(m_log) |>
  ggplot(aes(.fitted, .resid)) +
  geom_hline(yintercept = 0, linetype = 2) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "loess", se = FALSE) +
  labs(title = "Residuos vs Ajustados", x = "Ajustados", y = "Residuos")

## `geom_smooth()` using formula = 'y ~ x'

#Normalidad en los residuos

if(nrow(df) <= 5000){
  print(shapiro.test(residuals(m_log)))
}

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(m_log)
## W = 0.98685, p-value = 0.0001111

jb <- tseries::jarque.bera.test(residuals(m_log))

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

jb

## 
##  Jarque Bera Test
## 
## data:  residuals(m_log)
## X-squared = 26.035, df = 2, p-value = 2.221e-06

ggpubr::ggqqplot(residuals(m_log)) + ggtitle("QQ-plot de residuos")

#Multicolinealidad

vifs <- car::vif(m_log)
vifs

##      educ     exper    tenure    female   married   expersq 
##  1.164312 16.183797  1.410458  1.072362  1.302549 14.924497

kappa_X <- kappa(model.matrix(m_log))
kappa_X

## [1] 4635.054

#Soluciones econométricas si los supuestos fallan # SE robustos a heterocedasticidad

coeftest(m_log, vcov = sandwich::vcovHC(m_log, type = "HC1"))

## 
## t test of coefficients:
## 
##                Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)  0.41584244  0.10658752  3.9014 0.0001082 ***
## educ         0.07983221  0.00765428 10.4298 < 2.2e-16 ***
## exper        0.03009952  0.00488331  6.1638 1.428e-09 ***
## tenure       0.01607388  0.00348398  4.6137 4.991e-06 ***
## female      -0.29113027  0.03686784 -7.8966 1.720e-14 ***
## married      0.05644940  0.04170194  1.3536 0.1764406    
## expersq     -0.00060115  0.00010185 -5.9023 6.478e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

WLS

w <- 1 / (fitted(m_log)^2 + 1e-8)
m_wls <- lm(fml, data = df, weights = w)
summary(m_wls)

## 
## Call:
## lm(formula = fml, data = df, weights = w)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.55214 -0.15953 -0.01655  0.15536  0.78938 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.164e-01  7.966e-02   8.994  < 2e-16 ***
## educ         5.279e-02  5.409e-03   9.760  < 2e-16 ***
## exper        3.039e-02  4.847e-03   6.271 7.57e-10 ***
## tenure       1.462e-02  3.142e-03   4.654 4.13e-06 ***
## female      -2.479e-01  3.551e-02  -6.982 8.97e-12 ***
## married      7.861e-02  3.888e-02   2.022   0.0437 *  
## expersq     -6.044e-04  9.913e-05  -6.098 2.10e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2571 on 519 degrees of freedom
## Multiple R-squared:  0.3953, Adjusted R-squared:  0.3884 
## F-statistic: 56.56 on 6 and 519 DF,  p-value: < 2.2e-16

bptest(m_log)

## 
##  studentized Breusch-Pagan test
## 
## data:  m_log
## BP = 17.023, df = 6, p-value = 0.009197

bptest(m_wls)

## 
##  studentized Breusch-Pagan test
## 
## data:  m_wls
## BP = 23.328, df = 6, p-value = 0.0006939

plot(fitted(m_log), residuals(m_log),
     main = "Residuos vs Ajustados (¨LOG)",
     xlab = "Valores ajustados", ylab = "Residuos")
abline(h = 0, col = "red")

plot(fitted(m_wls), residuals(m_wls),
     main = "Residuos vs Ajustados (WLS)",
     xlab = "Valores ajustados", ylab = "Residuos ponderados")
abline(h = 0, col = "red")

#Informe ordenado (tabla de coeficientes)

tab_log <- broom::tidy(m_log, conf.int = TRUE)
tab_rob <- broom::tidy(coeftest(m_log, vcov = sandwich::vcovHC(m_log, type="HC1")))
list(
  resumen_log = broom::glance(m_log),
  coef_ci_log = tab_log,
  coef_se_rob = tab_rob,
  prueba_bp = bp,
  reset_test = reset_lm,
  vifs = vifs,
  kappa = kappa_X
)

## $resumen_log
## # A tibble: 1 × 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.436         0.430 0.401      66.9 1.76e-61     6  -263.  542.  576.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
## 
## $coef_ci_log
## # A tibble: 7 × 7
##   term         estimate std.error statistic  p.value  conf.low conf.high
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>     <dbl>     <dbl>
## 1 (Intercept)  0.416     0.0993        4.19 3.33e- 5  0.221     0.611   
## 2 educ         0.0798    0.00683      11.7  3.37e-28  0.0664    0.0932  
## 3 exper        0.0301    0.00519       5.80 1.18e- 8  0.0199    0.0403  
## 4 tenure       0.0161    0.00288       5.58 3.86e- 8  0.0104    0.0217  
## 5 female      -0.291     0.0363       -8.02 6.88e-15 -0.362    -0.220   
## 6 married      0.0564    0.0409        1.38 1.68e- 1 -0.0240    0.137   
## 7 expersq     -0.000601  0.000110     -5.47 6.95e- 8 -0.000817 -0.000385
## 
## $coef_se_rob
## # A tibble: 7 × 5
##   term         estimate std.error statistic  p.value
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)  0.416     0.107         3.90 1.08e- 4
## 2 educ         0.0798    0.00765      10.4  2.99e-23
## 3 exper        0.0301    0.00488       6.16 1.43e- 9
## 4 tenure       0.0161    0.00348       4.61 4.99e- 6
## 5 female      -0.291     0.0369       -7.90 1.72e-14
## 6 married      0.0564    0.0417        1.35 1.76e- 1
## 7 expersq     -0.000601  0.000102     -5.90 6.48e- 9
## 
## $prueba_bp
## 
##  studentized Breusch-Pagan test
## 
## data:  m_log
## BP = 17.023, df = 6, p-value = 0.009197
## 
## 
## $reset_test
## 
##  RESET test
## 
## data:  m_log
## RESET = 7.0291, df1 = 2, df2 = 517, p-value = 0.0009729
## 
## 
## $vifs
##      educ     exper    tenure    female   married   expersq 
##  1.164312 16.183797  1.410458  1.072362  1.302549 14.924497 
## 
## $kappa
## [1] 4635.054

Taller Econometria 1

Danna Isabella Casquete y Claudia Patricia Ramirez

2025-11-01

Instalación de Paquetes y Carga de Librerías

RESET test

Gráfico residuos vs ajustados

WLS