homework

library(ipumsr)
library(forcats)
library(gtsummary)
library(car)

## Loading required package: carData

library(sur)

## 
## Attaching package: 'sur'

## The following objects are masked from 'package:carData':
## 
##     Anscombe, States

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.0.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(dplyr)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(grid)
library(Matrix)
library(dplyr, warn.conflicts = FALSE)

ddi <- read_ipums_ddi("nhis_00005.xml")
dat <- read_ipums_micro(ddi)

## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.

dat %>%
  select_if(is.labelled)

## # A tibble: 72,831 × 11
##       STRATA    PSU  REGION ASTATFLG CSTATFLG  HISPETH      EDUC  HEALTH USUALPL
##    <dbl+lbl> <dbl+> <int+l> <int+lb> <int+lb> <int+lb> <int+lbl> <int+l> <int+l>
##  1      7103     19 3 [Sou… 1 [Samp… 0 [NIU]  10 [Not… 113 [Gra… 3 [Goo… 2 [Yes…
##  2      7137     38 2 [Nor… 3 [Not … 0 [NIU]  10 [Not… 201 [Hig… 3 [Goo… 0 [NIU]
##  3      7137     38 2 [Nor… 2 [Samp… 0 [NIU]  10 [Not… 301 [Som… 3 [Goo… 0 [NIU]
##  4      7137     38 2 [Nor… 0 [NIU]  1 [Samp… 10 [Not… 114 [Gra… 3 [Goo… 2 [Yes…
##  5      7106     22 3 [Sou… 1 [Samp… 0 [NIU]  10 [Not… 503 [Doc… 3 [Goo… 2 [Yes…
##  6      7106     22 3 [Sou… 3 [Not … 0 [NIU]  10 [Not… 400 [Bac… 2 [Ver… 0 [NIU]
##  7      7106     22 3 [Sou… 0 [NIU]  1 [Samp… 10 [Not… 106 [Gra… 1 [Exc… 2 [Yes…
##  8      7106     22 3 [Sou… 0 [NIU]  3 [Not … 10 [Not… 102 [Nev… 1 [Exc… 0 [NIU]
##  9      7117     25 2 [Nor… 1 [Samp… 0 [NIU]  10 [Not… 302 [AA … 2 [Ver… 2 [Yes…
## 10      7117     25 2 [Nor… 3 [Not … 0 [NIU]  10 [Not… 301 [Som… 1 [Exc… 0 [NIU]
## # … with 72,821 more rows, and 2 more variables: YBARMEDS <int+lbl>,
## #   HINOTCOVE <int+lbl>

#Poor or fair self health rating 
dat$badhealth1 <- Recode(dat$HEALTH, recodes="4:5=1; 1:3=0; else=NA")

#Poor or fair self health rating final
dat$badhealth <- as.factor(ifelse(dat$badhealth1==1, "Good health", "Poor health"))

#Health insurance coverage 
dat$health_insurance1 <- Recode(dat$HINOTCOVE, recodes="1=1; 2=2; else=NA")

#Health insurance coverage final
dat$health_insurance <- as.factor(ifelse(dat$health_insurance1==1, "Has coverage", "No coverage"))

#Can afford medication
dat$afford_medications1 <- Recode(dat$YBARMEDS, recodes="1=1; 2=2; else=NA")

#Can afford medication final
dat$afford_medications <- as.factor(ifelse(dat$afford_medications1==1, "Can afford meds", "Cannot afford meds"))

#Usual place for medical care 
dat$usual_place_care1 <- Recode(dat$USUALPL, recodes="1=1; 2=2; else=NA")

#Usual place for medical care final
dat$usual_place_for_care <- as.factor(ifelse(dat$usual_place_care1==1, "Has usual place", "No usual place"))

#Hispanic ethnicity
dat$ethnicity1  <- Recode(dat$HISPETH , recodes="20:70=1; 10=0; else=NA")

#Hispanic ethnicity final
dat$ethnicity <- as.factor(ifelse(dat$ethnicity1==1, "Hispanic", "Not Hispanic"))

#Region of residence  
dat$Region_of_residence  <- Recode(dat$REGION , recodes="01=1; 02=2; 03=3; 04=4; else=NA")

#Educational attainment
dat$Education_level  <- Recode(dat$EDUC , recodes="500:502=6; 302:400=5; 301:300=4; 202:200=3; 116:112= 2; 111:101= 1; else=NA", as.factor(T))

The binary outcome variable that I chose is HINOTCOVE. This variable is a yes or no variable that indicates whether a person has any form of medical insurance coverage. The answers 0, 7, 8 and 9 were omitted from the recoded variable. The way that I recoded this variable was initially using the recode function to eliminate the unneeded answers. I then used the ifelse statement to change 1 to ‘has coverage’ and 2 to ‘No coverage.’
Research question: In what ways does a persons health insurance status impact their ability to access needed diabetes related healthcare, such as affording medications and seeing a provider regularly?
The predictor variables I used are (USUALPL) if a person has a usual place for medical care, (HISPETH) if a person identifies as Hispanic/Spanish/Latino origin or ancestry and (YBARMEDS) whether a person had difficulty affording medications in the past 12 months. USUALPL shows whether a person has one or more usual places they go when they need medical care or advice. The second predictor variable HISPETH shows whether a person considers themselves of Hispanic descent. This variable was broken into White or Hispanic/Latino/Mexican origin.

table(dat$HEALTH)

## 
##     1     2     3     4     5     7     9 
## 25109 22683 17432  5822  1711    50    24

table(dat$afford_medications)

## 
##    Can afford meds Cannot afford meds 
##              31667               1815

table(dat$health_insurance)

## 
## Has coverage  No coverage 
##        65706         6378

table(dat$health_insurance)

## 
## Has coverage  No coverage 
##        65706         6378

table(dat$health_insurance, dat$afford_medications)

##               
##                Can afford meds Cannot afford meds
##   Has coverage           29297               1360
##   No coverage             2261                450

table(dat$afford_medications, dat$ethnicity)

##                     
##                      Hispanic Not Hispanic
##   Can afford meds        4734        26933
##   Cannot afford meds      293         1522

100* prop.table(table(dat$health_insurance, dat$afford_medications), margin=2)

##               
##                Can afford meds Cannot afford meds
##   Has coverage       92.835414          75.138122
##   No coverage         7.164586          24.861878

I used the prop table function to create 4 values for persons who are able to afford medications versus cannot in the past 12 months while looking at there health insurance status.

chisq.test(table(dat$health_insurance, dat$afford_medications))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(dat$health_insurance, dat$afford_medications)
## X-squared = 715.88, df = 1, p-value < 2.2e-16

The p-value from the chi-squared test shows that there is statistical significance between a persons ability to afford medication in the past 12 months and their insurance status.

summary(dat$health_insurance1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   1.000   1.000   1.089   1.000   2.000     747

summary(dat$afford_medications1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00    1.00    1.00    1.05    1.00    2.00   39349

homework

Bryan Solomon

2/6/2022