library(ipumsr)
library(forcats)
library(gtsummary)
library(car)
## Loading required package: carData
library(sur)
##
## Attaching package: 'sur'
## The following objects are masked from 'package:carData':
##
## Anscombe, States
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(dplyr)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(grid)
library(Matrix)
library(dplyr, warn.conflicts = FALSE)
ddi <- read_ipums_ddi("nhis_00005.xml")
dat <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
dat %>%
select_if(is.labelled)
## # A tibble: 72,831 × 11
## STRATA PSU REGION ASTATFLG CSTATFLG HISPETH EDUC HEALTH USUALPL
## <dbl+lbl> <dbl+> <int+l> <int+lb> <int+lb> <int+lb> <int+lbl> <int+l> <int+l>
## 1 7103 19 3 [Sou… 1 [Samp… 0 [NIU] 10 [Not… 113 [Gra… 3 [Goo… 2 [Yes…
## 2 7137 38 2 [Nor… 3 [Not … 0 [NIU] 10 [Not… 201 [Hig… 3 [Goo… 0 [NIU]
## 3 7137 38 2 [Nor… 2 [Samp… 0 [NIU] 10 [Not… 301 [Som… 3 [Goo… 0 [NIU]
## 4 7137 38 2 [Nor… 0 [NIU] 1 [Samp… 10 [Not… 114 [Gra… 3 [Goo… 2 [Yes…
## 5 7106 22 3 [Sou… 1 [Samp… 0 [NIU] 10 [Not… 503 [Doc… 3 [Goo… 2 [Yes…
## 6 7106 22 3 [Sou… 3 [Not … 0 [NIU] 10 [Not… 400 [Bac… 2 [Ver… 0 [NIU]
## 7 7106 22 3 [Sou… 0 [NIU] 1 [Samp… 10 [Not… 106 [Gra… 1 [Exc… 2 [Yes…
## 8 7106 22 3 [Sou… 0 [NIU] 3 [Not … 10 [Not… 102 [Nev… 1 [Exc… 0 [NIU]
## 9 7117 25 2 [Nor… 1 [Samp… 0 [NIU] 10 [Not… 302 [AA … 2 [Ver… 2 [Yes…
## 10 7117 25 2 [Nor… 3 [Not … 0 [NIU] 10 [Not… 301 [Som… 1 [Exc… 0 [NIU]
## # … with 72,821 more rows, and 2 more variables: YBARMEDS <int+lbl>,
## # HINOTCOVE <int+lbl>
#Poor or fair self health rating
dat$badhealth1 <- Recode(dat$HEALTH, recodes="4:5=1; 1:3=0; else=NA")
#Poor or fair self health rating final
dat$badhealth <- as.factor(ifelse(dat$badhealth1==1, "Good health", "Poor health"))
#Health insurance coverage
dat$health_insurance1 <- Recode(dat$HINOTCOVE, recodes="1=1; 2=2; else=NA")
#Health insurance coverage final
dat$health_insurance <- as.factor(ifelse(dat$health_insurance1==1, "Has coverage", "No coverage"))
#Can afford medication
dat$afford_medications1 <- Recode(dat$YBARMEDS, recodes="1=1; 2=2; else=NA")
#Can afford medication final
dat$afford_medications <- as.factor(ifelse(dat$afford_medications1==1, "Can afford meds", "Cannot afford meds"))
#Usual place for medical care
dat$usual_place_care1 <- Recode(dat$USUALPL, recodes="1=1; 2=2; else=NA")
#Usual place for medical care final
dat$usual_place_for_care <- as.factor(ifelse(dat$usual_place_care1==1, "Has usual place", "No usual place"))
#Hispanic ethnicity
dat$ethnicity1 <- Recode(dat$HISPETH , recodes="20:70=1; 10=0; else=NA")
#Hispanic ethnicity final
dat$ethnicity <- as.factor(ifelse(dat$ethnicity1==1, "Hispanic", "Not Hispanic"))
#Region of residence
dat$Region_of_residence <- Recode(dat$REGION , recodes="01=1; 02=2; 03=3; 04=4; else=NA")
#Educational attainment
dat$Education_level <- Recode(dat$EDUC , recodes="500:502=6; 302:400=5; 301:300=4; 202:200=3; 116:112= 2; 111:101= 1; else=NA", as.factor(T))
The binary outcome variable that I chose is HINOTCOVE. This variable is a yes or no variable that indicates whether a person has any form of medical insurance coverage. The answers 0, 7, 8 and 9 were omitted from the recoded variable. The way that I recoded this variable was initially using the recode function to eliminate the unneeded answers. I then used the ifelse statement to change 1 to ‘has coverage’ and 2 to ‘No coverage.’
Research question: In what ways does a persons health insurance status impact their ability to access needed diabetes related healthcare, such as affording medications and seeing a provider regularly?
The predictor variables I used are (USUALPL) if a person has a usual place for medical care, (HISPETH) if a person identifies as Hispanic/Spanish/Latino origin or ancestry and (YBARMEDS) whether a person had difficulty affording medications in the past 12 months. USUALPL shows whether a person has one or more usual places they go when they need medical care or advice. The second predictor variable HISPETH shows whether a person considers themselves of Hispanic descent. This variable was broken into White or Hispanic/Latino/Mexican origin.
table(dat$HEALTH)
##
## 1 2 3 4 5 7 9
## 25109 22683 17432 5822 1711 50 24
table(dat$afford_medications)
##
## Can afford meds Cannot afford meds
## 31667 1815
table(dat$health_insurance)
##
## Has coverage No coverage
## 65706 6378
table(dat$health_insurance)
##
## Has coverage No coverage
## 65706 6378
table(dat$health_insurance, dat$afford_medications)
##
## Can afford meds Cannot afford meds
## Has coverage 29297 1360
## No coverage 2261 450
table(dat$afford_medications, dat$ethnicity)
##
## Hispanic Not Hispanic
## Can afford meds 4734 26933
## Cannot afford meds 293 1522
100* prop.table(table(dat$health_insurance, dat$afford_medications), margin=2)
##
## Can afford meds Cannot afford meds
## Has coverage 92.835414 75.138122
## No coverage 7.164586 24.861878
I used the prop table function to create 4 values for persons who are able to afford medications versus cannot in the past 12 months while looking at there health insurance status.
chisq.test(table(dat$health_insurance, dat$afford_medications))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(dat$health_insurance, dat$afford_medications)
## X-squared = 715.88, df = 1, p-value < 2.2e-16
The p-value from the chi-squared test shows that there is statistical significance between a persons ability to afford medication in the past 12 months and their insurance status.
summary(dat$health_insurance1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 1.000 1.000 1.089 1.000 2.000 747
summary(dat$afford_medications1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.00 1.00 1.00 1.05 1.00 2.00 39349