Este codigo reproduce el analisis de la tabla 1.1 de Angrist y Pischke (2014), que compara
library(tidyverse)
## -- Attaching packages ------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ---------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(haven)
carga la informacion
load("NHIS2009.rda")
head(NHIS2009)
## # A tibble: 6 x 40
## year inc1 inc2 inc3 inc4 inc5 inc6 inc7 inc8 serial
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835. 1
## 2 2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835. 3
## 3 2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835. 3
## 4 2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835. 3
## 5 2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835. 3
## 6 2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835. 4
## # ... with 30 more variables: hhweight <dbl>, pernum <dbl>,
## # perweight <dbl>, sampweight <dbl>, age <dbl>, marstat <dbl+lbl>,
## # sex <dbl+lbl>, famsize <dbl+lbl>, relate <dbl+lbl>, racenew <dbl+lbl>,
## # educ <dbl+lbl>, educrec1 <dbl+lbl>, empstat <dbl+lbl>,
## # incfam07on <dbl+lbl>, health <dbl+lbl>, uninsured <dbl+lbl>,
## # age2 <dbl>, fml <lgl>, nwhite <dbl>, hi <dbl>, yedu <dbl>, empl <dbl>,
## # hlth <dbl>, inc <dbl>, incmp <dbl>, brooks <dbl>, marradult <lgl>,
## # marradult_empl <dbl>, adltempl <dbl>, hi_hsb1 <dbl>
Quitar los valores faltantes
NHIS2009 <- NHIS2009 %>%
filter(marradult, perweight != 0) %>%
group_by(serial) %>%
mutate(hi_hsb1 = mean(hi_hsb1, na.rm = T)) %>%
filter(!is.na(hi_hsb1), !is.na(hi)) %>%
mutate(female = sum(fml)) %>%
filter(female == 1) %>%
select(-female)
La muestra incluye unicamente adultos casados con edades entre 26 y 59 a??os, y quita los hogares con una s??la persona
NHIS2009 <- NHIS2009 %>%
filter(between(age, 26, 59),
marradult, adltempl >= 1)
NHIS2009 <- NHIS2009 %>%
group_by(serial) %>%
filter(length(serial) > 1L) %>%
ungroup()
Se presenta la tabla de esposas y esposos por estatus de aseguramiento
NHIS2009 %>%
group_by(fml) %>%
# normaliza las ponderaciones de las personasnormalize person weights to match number of observations in each
# group
mutate(perweight = perweight / sum(perweight) * n()) %>%
group_by(fml, hi) %>%
summarise(n_wt = sum(perweight)) %>%
group_by(fml) %>%
mutate(prop = n_wt / sum(n_wt))
## # A tibble: 4 x 4
## # Groups: fml [2]
## fml hi n_wt prop
## <lgl> <dbl> <dbl> <dbl>
## 1 FALSE 0 1281. 0.136
## 2 FALSE 1 8114. 0.864
## 3 TRUE 0 1131. 0.120
## 4 TRUE 1 8264. 0.880
Comparacion de los estadisticos muestrales entre hombres y mujeres, con y sin seguro medico
varlist <- c("hlth", "nwhite", "age", "yedu", "famsize", "empl", "inc")
NHIS2009_diff <- NHIS2009 %>%
# rlang::set_attrs with NULL quita altributos de columnas.
# esto evita una advertencia respecto a diferencias en atributos
map_dfc(~ rlang::set_attrs(.x, NULL)) %>%
select(fml, hi, one_of(varlist)) %>%
gather(variable, value, -fml, -hi) %>%
group_by(fml, hi, variable) %>%
summarise(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE)) %>%
gather(stat, value, -fml, -hi, -variable) %>%
unite(stat_hi, stat, hi) %>%
spread(stat_hi, value) %>%
mutate(diff = mean_1 - mean_0)
knitr::kable(NHIS2009_diff, digits = 3)
| fml | variable | mean_0 | mean_1 | sd_0 | sd_1 | diff |
|---|---|---|---|---|---|---|
| FALSE | age | 41.270 | 44.163 | 8.402 | 8.609 | 2.893 |
| FALSE | empl | 0.852 | 0.922 | 0.355 | 0.268 | 0.070 |
| FALSE | famsize | 4.057 | 3.551 | 1.544 | 1.318 | -0.506 |
| FALSE | hlth | 3.699 | 3.977 | 1.010 | 0.934 | 0.278 |
| FALSE | inc | 43636.023 | 104002.438 | 35689.909 | 54815.081 | 60366.415 |
| FALSE | nwhite | 0.188 | 0.200 | 0.391 | 0.400 | 0.011 |
| FALSE | yedu | 11.213 | 14.132 | 3.472 | 2.681 | 2.919 |
| TRUE | age | 39.520 | 42.151 | 8.261 | 8.655 | 2.631 |
| TRUE | empl | 0.541 | 0.758 | 0.498 | 0.429 | 0.216 |
| TRUE | famsize | 4.073 | 3.553 | 1.541 | 1.321 | -0.520 |
| TRUE | hlth | 3.610 | 3.992 | 1.021 | 0.928 | 0.382 |
| TRUE | inc | 43641.387 | 103363.629 | 35158.894 | 55058.644 | 59722.242 |
| TRUE | nwhite | 0.183 | 0.202 | 0.387 | 0.401 | 0.018 |
| TRUE | yedu | 11.359 | 14.273 | 3.500 | 2.600 | 2.913 |