Encuesta Nacional de Salud

Este codigo reproduce el analisis de la tabla 1.1 de Angrist y Pischke (2014), que compara

library(tidyverse)
## -- Attaching packages ------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.6
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ---------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(haven)

carga la informacion

load("NHIS2009.rda")
head(NHIS2009)
## # A tibble: 6 x 40
##    year   inc1   inc2   inc3   inc4   inc5   inc6   inc7   inc8 serial
##   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <int>
## 1  2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835.      1
## 2  2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835.      3
## 3  2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835.      3
## 4  2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835.      3
## 5  2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835.      3
## 6  2009 19283. 41679. 61103. 85986. 1.68e5 25398. 1.10e5 70835.      4
## # ... with 30 more variables: hhweight <dbl>, pernum <dbl>,
## #   perweight <dbl>, sampweight <dbl>, age <dbl>, marstat <dbl+lbl>,
## #   sex <dbl+lbl>, famsize <dbl+lbl>, relate <dbl+lbl>, racenew <dbl+lbl>,
## #   educ <dbl+lbl>, educrec1 <dbl+lbl>, empstat <dbl+lbl>,
## #   incfam07on <dbl+lbl>, health <dbl+lbl>, uninsured <dbl+lbl>,
## #   age2 <dbl>, fml <lgl>, nwhite <dbl>, hi <dbl>, yedu <dbl>, empl <dbl>,
## #   hlth <dbl>, inc <dbl>, incmp <dbl>, brooks <dbl>, marradult <lgl>,
## #   marradult_empl <dbl>, adltempl <dbl>, hi_hsb1 <dbl>

Quitar los valores faltantes

NHIS2009 <- NHIS2009 %>%
  filter(marradult, perweight != 0) %>%
  group_by(serial) %>%
  mutate(hi_hsb1 = mean(hi_hsb1, na.rm = T)) %>%
  filter(!is.na(hi_hsb1), !is.na(hi)) %>%
  mutate(female = sum(fml)) %>%
  filter(female == 1) %>%
  select(-female)

La muestra incluye unicamente adultos casados con edades entre 26 y 59 a??os, y quita los hogares con una s??la persona

NHIS2009 <- NHIS2009 %>%
  filter(between(age, 26, 59),
         marradult, adltempl >= 1)
NHIS2009 <- NHIS2009 %>%
  group_by(serial) %>%
  filter(length(serial) > 1L) %>%
  ungroup()

Se presenta la tabla de esposas y esposos por estatus de aseguramiento

NHIS2009 %>%
  group_by(fml) %>%
  # normaliza las ponderaciones de las personasnormalize person weights to match number of observations in each
  # group
  mutate(perweight = perweight / sum(perweight) * n()) %>%
  group_by(fml, hi) %>%
  summarise(n_wt = sum(perweight)) %>%
  group_by(fml) %>%
  mutate(prop = n_wt / sum(n_wt))
## # A tibble: 4 x 4
## # Groups:   fml [2]
##   fml      hi  n_wt  prop
##   <lgl> <dbl> <dbl> <dbl>
## 1 FALSE     0 1281. 0.136
## 2 FALSE     1 8114. 0.864
## 3 TRUE      0 1131. 0.120
## 4 TRUE      1 8264. 0.880

Comparacion de los estadisticos muestrales entre hombres y mujeres, con y sin seguro medico

varlist <- c("hlth", "nwhite", "age", "yedu", "famsize", "empl", "inc")
NHIS2009_diff <- NHIS2009 %>%
  # rlang::set_attrs with NULL quita altributos de columnas.
  # esto evita una advertencia respecto a diferencias en atributos
  map_dfc(~ rlang::set_attrs(.x, NULL)) %>%
  select(fml, hi, one_of(varlist)) %>%
  gather(variable, value, -fml, -hi) %>%
  group_by(fml, hi, variable) %>%
  summarise(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE)) %>%
  gather(stat, value, -fml, -hi, -variable) %>%
  unite(stat_hi, stat, hi) %>%
  spread(stat_hi, value) %>%
  mutate(diff = mean_1 - mean_0)
knitr::kable(NHIS2009_diff, digits = 3)
fml variable mean_0 mean_1 sd_0 sd_1 diff
FALSE age 41.270 44.163 8.402 8.609 2.893
FALSE empl 0.852 0.922 0.355 0.268 0.070
FALSE famsize 4.057 3.551 1.544 1.318 -0.506
FALSE hlth 3.699 3.977 1.010 0.934 0.278
FALSE inc 43636.023 104002.438 35689.909 54815.081 60366.415
FALSE nwhite 0.188 0.200 0.391 0.400 0.011
FALSE yedu 11.213 14.132 3.472 2.681 2.919
TRUE age 39.520 42.151 8.261 8.655 2.631
TRUE empl 0.541 0.758 0.498 0.429 0.216
TRUE famsize 4.073 3.553 1.541 1.321 -0.520
TRUE hlth 3.610 3.992 1.021 0.928 0.382
TRUE inc 43641.387 103363.629 35158.894 55058.644 59722.242
TRUE nwhite 0.183 0.202 0.387 0.401 0.018
TRUE yedu 11.359 14.273 3.500 2.600 2.913