library(car)
## Loading required package: carData
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)

Importing Data

brfss20 = readRDS(url("https://github.com/coreysparks/DEM7283/blob/master/data/brfss20sm.rds?raw=true"))

names(brfss20) = tolower(gsub(pattern = "_",replacement =  "",x =  names(brfss20)))

Recoding the Variables

  1. A binary outcome variable “hlthcare” was defined by dichotomizing “hlthpln1,” a variable from the 2020 BRFSS survey data which measures whether a survey respondent has health care coverage, including health insurance, prepaid plans or government plans.

  2. Does access to healthcare coverage vary on the basis of race/ethnicity or education?

  3. Two categorical predictor variables were recoded and reveled for this analysis: “race_eth” and “educ.”

#Access to healthcare
brfss20$hlthcare = Recode(brfss20$hlthpln1, recodes = "1=1; 2=0; else=NA")

#Race/ethnicity
brfss20$black = Recode(brfss20$racegr3,
                      recodes="2=1; 9=NA; else=0")

brfss20$white = Recode(brfss20$racegr3, 
                      recodes="1=1; 9=NA; else=0")

brfss20$other = Recode(brfss20$racegr3,
                      recodes="3:4=1; 9=NA; else=0")

brfss20$hispanic = Recode(brfss20$racegr3,
                         recodes="5=1; 9=NA; else=0")

brfss20$race_eth = Recode(brfss20$racegr3,
recodes="1='nh white'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)

brfss20$race_eth = relevel(brfss20$race_eth,
                          ref = "nh white")

#Education level
brfss20$educ = Recode(brfss20$educa,
                     recodes="1:2='Less than HS'; 
                     3='Some HS'; 
                     4='HS grad'; 
                     5='Some college'; 
                     6='College grad';9=NA",
                     as.factor=T)

brfss20$educ = fct_relevel(brfss20$educ,'Less than HS','Some HS','HS grad','Some college','College grad' ) 

#Age cut into intervals
brfss20$agec = cut(brfss20$age80, 
                  breaks=c(0,24,39,59,79,99))

#Filter cases
brfss20 = brfss20 %>% 
  filter(is.na(hlthcare)==F,
         is.na(race_eth)==F,
         is.na(educ)==F,
         is.na(agec)==F)
  1. Perform a descriptive analysis of the outcome variable by each of the variables you defined in part b.
  1. Calculate descriptive statistics (mean or percentages) for each variable using no weights or survey design, as well as with full survey design and weights.
#Descriptive statistics with no weights or survey design
library(tableone)
tbl_race_eth = CreateTableOne(vars = c("hlthcare"),
                        strata = "race_eth",
                        data = brfss20)
print(tbl_race_eth, format = "p")
##                       Stratified by race_eth
##                        nh white      hispanic     nh black     nh multirace
##   n                    137323        20465        19000        3442        
##   hlthcare (mean (SD))   0.95 (0.22)  0.75 (0.44)  0.90 (0.31) 0.91 (0.29) 
##                       Stratified by race_eth
##                        nh other    p      test
##   n                    8871                   
##   hlthcare (mean (SD)) 0.91 (0.29) <0.001
tbl_educ = CreateTableOne(vars = c("hlthcare"),
                        strata = "educ",
                        data = brfss20)
print(tbl_educ, format = "p")
##                       Stratified by educ
##                        Less than HS Some HS     HS grad      Some college
##   n                    3719         7085        44885        50241       
##   hlthcare (mean (SD)) 0.61 (0.49)  0.77 (0.42)  0.88 (0.33)  0.92 (0.27)
##                       Stratified by educ
##                        College grad p      test
##   n                    83171                   
##   hlthcare (mean (SD))  0.96 (0.19) <0.001
tbl_agec = CreateTableOne(vars = c("hlthcare"),
                        strata = "agec",
                        data = brfss20)
print(tbl_agec, format = "p")
##                       Stratified by agec
##                        (0,24]       (24,39]      (39,59]      (59,79]     
##   n                    13011        36887        60658        65124       
##   hlthcare (mean (SD))  0.85 (0.36)  0.85 (0.35)  0.90 (0.30)  0.97 (0.17)
##                       Stratified by agec
##                        (79,99]      p      test
##   n                    13421                   
##   hlthcare (mean (SD))  0.98 (0.14) <0.001
#Descriptive statistics with full survey design and weights
library(srvyr)
## 
## Attaching package: 'srvyr'
## The following object is masked from 'package:stats':
## 
##     filter
options(survey.lonely.psu = "adjust")

des = svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = brfss20 )
des
## Stratified Independent Sampling design (with replacement)
## svydesign(ids = ~1, strata = ~ststr, weights = ~mmsawt, data = brfss20)
wght.tbl_race_eth = svyCreateTableOne(vars = c("hlthcare"),
                       strata = "race_eth",
                       test = T,
                       data = des)
print(wght.tbl_race_eth,
      format="p")
##                       Stratified by race_eth
##                        nh white           hispanic           nh black          
##   n                    83518286.98        28373858.31        20352750.99       
##   hlthcare (mean (SD))        0.93 (0.25)        0.71 (0.45)        0.86 (0.35)
##                       Stratified by race_eth
##                        nh multirace      nh other           p      test
##   n                    1755817.04        11515006.94                   
##   hlthcare (mean (SD))       0.89 (0.31)        0.91 (0.28) <0.001
wght.tbl_educ = svyCreateTableOne(vars = c("hlthcare"),
                       strata = "educ",
                       test = T,
                       data = des)
print(wght.tbl_educ,
      format="p")
##                       Stratified by educ
##                        Less than HS      Some HS            HS grad           
##   n                    6534466.48        10412090.51        36902861.45       
##   hlthcare (mean (SD))       0.61 (0.49)        0.72 (0.45)        0.84 (0.36)
##                       Stratified by educ
##                        Some college       College grad       p      test
##   n                    43028676.08        48637625.75                   
##   hlthcare (mean (SD))        0.90 (0.31)        0.95 (0.21) <0.001
wght.tbl_agec = svyCreateTableOne(vars = c("hlthcare"),
                       strata = "agec",
                       test = T,
                       data = des)
print(wght.tbl_agec,
      format="p")
##                       Stratified by agec
##                        (0,24]             (24,39]            (39,59]           
##   n                    16905719.53        38455022.54        47892774.36       
##   hlthcare (mean (SD))        0.82 (0.38)        0.82 (0.38)        0.87 (0.34)
##                       Stratified by agec
##                        (59,79]            (79,99]           p      test
##   n                    35694767.79        6567436.03                   
##   hlthcare (mean (SD))        0.95 (0.21)       0.97 (0.16) <0.001
  1. Calculate percentages, or means, for each of your independent variables for each level of your outcome variable and present this in a table, with appropriate survey-corrected test statistics.
library(gtsummary)

brfss20%>%
  as_survey_design( strata = ststr,
                    weights = mmsawt)%>%
  select(hlthcare, race_eth, educ, agec)%>%
  tbl_svysummary(by = race_eth, 
              label = list(hlthcare = "Access to Healthcare",
                           educ = "Level of Education",
                           agec = "Age Group"))%>%
  add_p()%>%
  add_n()
Characteristic N nh white, N = 83,518,2871 hispanic, N = 28,373,8581 nh black, N = 20,352,7511 nh multirace, N = 1,755,8171 nh other, N = 11,515,0071 p-value2
Access to Healthcare 145,515,720 77,884,098 (93%) 20,137,931 (71%) 17,465,782 (86%) 1,567,783 (89%) 10,508,626 (91%) <0.001
Level of Education 145,515,720 <0.001
Less than HS 951,378 (1.1%) 4,824,536 (17%) 414,443 (2.0%) 26,212 (1.5%) 317,897 (2.8%)
Some HS 3,940,390 (4.7%) 3,968,005 (14%) 1,900,251 (9.3%) 161,998 (9.2%) 441,447 (3.8%)
HS grad 20,961,906 (25%) 7,654,823 (27%) 5,818,888 (29%) 433,207 (25%) 2,034,037 (18%)
Some college 26,125,544 (31%) 6,768,696 (24%) 6,725,038 (33%) 615,421 (35%) 2,793,977 (24%)
College grad 31,539,068 (38%) 5,157,798 (18%) 5,494,130 (27%) 518,979 (30%) 5,927,650 (51%)
Age Group 145,515,720 <0.001
(0,24] 8,125,772 (9.7%) 4,430,181 (16%) 2,219,973 (11%) 335,870 (19%) 1,793,923 (16%)
(24,39] 19,027,766 (23%) 9,185,779 (32%) 5,715,884 (28%) 562,270 (32%) 3,963,324 (34%)
(39,59] 26,574,345 (32%) 9,692,479 (34%) 7,226,113 (36%) 534,971 (30%) 3,864,865 (34%)
(59,79] 24,727,050 (30%) 4,419,323 (16%) 4,595,578 (23%) 274,994 (16%) 1,677,823 (15%)
(79,99] 5,063,354 (6.1%) 646,096 (2.3%) 595,203 (2.9%) 47,712 (2.7%) 215,072 (1.9%)

1 n (%)

2 chi-squared test with Rao & Scott's second-order correction

  1. According to the percentages displayed in the tables produced in part d1, there are substantive differences the descriptive results between the analysis using no survey design and the analysis using survey design. Estimates produced by the analysis using survey design are slightly smaller across almost all variable levels.