Let’s set up the library packages we might be using for our analysis

library(tidyverse)
library(haven)
library(janitor)
library(descriptio)
#install.packages("weights")
library(weights)
#install.packages("kableExtra")
library(kableExtra)
library(totalcensus)
#install.packages("cdlTools")
library(cdlTools)

Now we read in the data

df_2016 <- read_xpt("./LLCP2016.XPT_")

variable.names(df_2016)
##   [1] "_STATE"   "FMONTH"   "IDATE"    "IMONTH"   "IDAY"     "IYEAR"   
##   [7] "DISPCODE" "SEQNO"    "_PSU"     "CTELENM1" "PVTRESD1" "COLGHOUS"
##  [13] "STATERES" "CELLFON4" "LADULT"   "NUMADULT" "NUMMEN"   "NUMWOMEN"
##  [19] "CTELNUM1" "CELLFON5" "CADULT"   "PVTRESD3" "CCLGHOUS" "CSTATE1" 
##  [25] "LANDLINE" "HHADULT"  "GENHLTH"  "PHYSHLTH" "MENTHLTH" "POORHLTH"
##  [31] "HLTHPLN1" "PERSDOC2" "MEDCOST"  "CHECKUP1" "EXERANY2" "SLEPTIM1"
##  [37] "CVDINFR4" "CVDCRHD4" "CVDSTRK3" "ASTHMA3"  "ASTHNOW"  "CHCSCNCR"
##  [43] "CHCOCNCR" "CHCCOPD1" "HAVARTH3" "ADDEPEV2" "CHCKIDNY" "DIABETE3"
##  [49] "DIABAGE2" "LASTDEN3" "RMVTETH3" "SEX"      "MARITAL"  "EDUCA"   
##  [55] "RENTHOM1" "NUMHHOL2" "NUMPHON2" "CPDEMO1"  "VETERAN3" "EMPLOY1" 
##  [61] "CHILDREN" "INCOME2"  "INTERNET" "WEIGHT2"  "HEIGHT3"  "PREGNANT"
##  [67] "DEAF"     "BLIND"    "DECIDE"   "DIFFWALK" "DIFFDRES" "DIFFALON"
##  [73] "SMOKE100" "SMOKDAY2" "STOPSMK2" "LASTSMK2" "USENOW3"  "ECIGARET"
##  [79] "ECIGNOW"  "ALCDAY5"  "AVEDRNK2" "DRNK3GE5" "MAXDRNKS" "FLUSHOT6"
##  [85] "FLSHTMY2" "PNEUVAC3" "TETANUS"  "FALL12MN" "FALLINJ2" "SEATBELT"
##  [91] "DRNKDRI2" "HADMAM"   "HOWLONG"  "HADPAP2"  "LASTPAP2" "HPVTEST" 
##  [97] "HPLSTTST" "HADHYST2" "PCPSAAD2" "PCPSADI1" "PCPSARE1" "PSATEST1"
## [103] "PSATIME"  "PCPSARS1" "BLDSTOOL" "LSTBLDS3" "HADSIGM3" "HADSGCO1"
## [109] "LASTSIG3" "HIVTST6"  "HIVTSTD3" "HIVRISK4" "PDIABTST" "PREDIAB1"
## [115] "INSULIN"  "BLDSUGAR" "FEETCHK2" "DOCTDIAB" "CHKHEMO3" "FEETCHK" 
## [121] "EYEEXAM"  "DIABEYE"  "DIABEDU"  "PAINACT2" "QLMENTL2" "QLSTRES2"
## [127] "QLHLTH2"  "MEDICARE" "HLTHCVR1" "DELAYMED" "DLYOTHER" "NOCOV121"
## [133] "LSTCOVRG" "DRVISITS" "MEDSCOST" "CARERCVD" "MEDBILL1" "MEDADVIC"
## [139] "UNDRSTND" "WRITTEN"  "CAREGIV1" "CRGVREL1" "CRGVLNG1" "CRGVHRS1"
## [145] "CRGVPRB2" "CRGVPERS" "CRGVHOUS" "CRGVMST2" "CRGVEXPT" "CIMEMLOS"
## [151] "CDHOUSE"  "CDASSIST" "CDHELP"   "CDSOCIAL" "CDDISCUS" "SSBSUGR2"
## [157] "SSBFRUT2" "CALRINFO" "MARIJANA" "USEMRJNA" "ASTHMAGE" "ASATTACK"
## [163] "ASERVIST" "ASDRVIST" "ASRCHKUP" "ASACTLIM" "ASYMPTOM" "ASNOSLEP"
## [169] "ASTHMED3" "ASINHALR" "IMFVPLAC" "HPVADVC2" "HPVADSHT" "SHINGLE2"
## [175] "NUMBURN2" "CNCRDIFF" "CNCRAGE"  "CNCRTYP1" "CSRVTRT1" "CSRVDOC1"
## [181] "CSRVSUM"  "CSRVRTRN" "CSRVINST" "CSRVINSR" "CSRVDEIN" "CSRVCLIN"
## [187] "CSRVPAIN" "CSRVCTL1" "PROFEXAM" "LENGEXAM" "PCPSADE1" "PCDMDECN"
## [193] "SXORIENT" "TRNSGNDR" "RCSGENDR" "RCSRLTN2" "CASTHDX2" "CASTHNO2"
## [199] "EMTSUPRT" "LSATISFY" "QLACTLM2" "USEEQUIP" "QSTVER"   "QSTLANG" 
## [205] "MSCODE"   "_STSTR"   "_STRWT"   "_RAWRAKE" "_WT2RAKE" "_CHISPNC"
## [211] "_CRACE1"  "_CPRACE"  "_CLLCPWT" "_DUALUSE" "_DUALCOR" "_LLCPWT2"
## [217] "_LLCPWT"  "_RFHLTH"  "_PHYS14D" "_MENT14D" "_HCVU651" "_TOTINDA"
## [223] "_MICHD"   "_LTASTH1" "_CASTHM1" "_ASTHMS1" "_DRDXAR1" "_EXTETH2"
## [229] "_ALTETH2" "_DENVST2" "_PRACE1"  "_MRACE1"  "_HISPANC" "_RACE"   
## [235] "_RACEG21" "_RACEGR3" "_RACE_G1" "_AGEG5YR" "_AGE65YR" "_AGE80"  
## [241] "_AGE_G"   "HTIN4"    "HTM4"     "WTKG3"    "_BMI5"    "_BMI5CAT"
## [247] "_RFBMI5"  "_CHLDCNT" "_EDUCAG"  "_INCOMG"  "_SMOKER3" "_RFSMOK3"
## [253] "_ECIGSTS" "_CURECIG" "DRNKANY5" "DROCDY3_" "_RFBING5" "_DRNKWEK"
## [259] "_RFDRHV5" "_FLSHOT6" "_PNEUMO2" "_RFSEAT2" "_RFSEAT3" "_DRNKDRV"
## [265] "_RFMAM2Y" "_MAM5021" "_RFPAP33" "_RFPSA21" "_RFBLDS3" "_COL10YR"
## [271] "_HFOB3YR" "_FS5YR"   "_FOBTFS"  "_CRCREC"  "_AIDTST3"
df2_2016<- df_2016 %>% select(`_STATE`,DISPCODE,SEX,GENHLTH,PHYSHLTH,`_PHYS14D`,
                              MENTHLTH,`_MENT14D`,ADDEPEV2,DIABETE3,HLTHPLN1,
                              ASTHMA3,`_LTASTH1`,`_MICHD`,CHCSCNCR,CHCOCNCR,
                              `_HISPANC`,`_RACE`,`_RACEGR3`,`_PSU`,`_STSTR`,
                              `_STRWT`,`_LLCPWT`,`_LLCPWT2`)

Let’s look at the structure of our data

str(df2_2016)
## tibble [486,303 × 24] (S3: tbl_df/tbl/data.frame)
##  $ _STATE  : num [1:486303] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "STATE FIPS CODE"
##  $ DISPCODE: num [1:486303] 1100 1100 1100 1100 1100 1100 1100 1100 1100 1100 ...
##   ..- attr(*, "label")= chr "FINAL DISPOSITION"
##  $ SEX     : num [1:486303] 1 2 2 1 1 2 2 2 2 1 ...
##   ..- attr(*, "label")= chr "RESPONDENTS SEX"
##  $ GENHLTH : num [1:486303] 1 2 3 3 5 3 3 3 2 2 ...
##   ..- attr(*, "label")= chr "GENERAL HEALTH"
##  $ PHYSHLTH: num [1:486303] 88 88 88 88 10 88 88 88 88 88 ...
##   ..- attr(*, "label")= chr "NUMBER OF DAYS PHYSICAL HEALTH NOT GOOD"
##  $ _PHYS14D: num [1:486303] 1 1 1 1 2 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "COMPUTED PHYSICAL HEALTH STATUS"
##  $ MENTHLTH: num [1:486303] 88 88 1 88 3 88 88 88 88 88 ...
##   ..- attr(*, "label")= chr "NUMBER OF DAYS MENTAL HEALTH NOT GOOD"
##  $ _MENT14D: num [1:486303] 1 1 2 1 2 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "COMPUTED MENTAL HEALTH STATUS"
##  $ ADDEPEV2: num [1:486303] 2 1 2 2 2 2 2 1 2 2 ...
##   ..- attr(*, "label")= chr "EVER TOLD YOU HAD A DEPRESSIVE DISORDER"
##  $ DIABETE3: num [1:486303] 3 3 3 1 3 3 3 1 3 3 ...
##   ..- attr(*, "label")= chr "(EVER TOLD) YOU HAVE DIABETES"
##  $ HLTHPLN1: num [1:486303] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "HAVE ANY HEALTH CARE COVERAGE"
##  $ ASTHMA3 : num [1:486303] 2 2 2 2 1 2 2 1 2 2 ...
##   ..- attr(*, "label")= chr "EVER TOLD HAD ASTHMA"
##  $ _LTASTH1: num [1:486303] 1 1 1 1 2 1 1 2 1 1 ...
##   ..- attr(*, "label")= chr "LIFETIME ASTHMA CALCULATED VARIABLE"
##  $ _MICHD  : num [1:486303] 2 2 1 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "RESPONDENTS THAT HAVE EVER REPORTED HAVI"
##  $ CHCSCNCR: num [1:486303] 2 2 1 1 2 2 1 1 2 2 ...
##   ..- attr(*, "label")= chr "(EVER TOLD) YOU HAD SKIN CANCER?"
##  $ CHCOCNCR: num [1:486303] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "(EVER TOLD) YOU HAD ANY OTHER TYPES OF C"
##  $ _HISPANC: num [1:486303] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "label")= chr "HISPANIC, LATINO/A, OR SPANISH ORIGIN CA"
##  $ _RACE   : num [1:486303] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "COMPUTED RACE-ETHNICITY GROUPING"
##  $ _RACEGR3: num [1:486303] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "COMPUTED FIVE LEVEL RACE/ETHNICITY CATEG"
##  $ _PSU    : num [1:486303] 2.02e+09 2.02e+09 2.02e+09 2.02e+09 2.02e+09 ...
##   ..- attr(*, "label")= chr "PRIMARY SAMPLING UNIT"
##  $ _STSTR  : num [1:486303] 11011 11011 11011 11011 11011 ...
##   ..- attr(*, "label")= chr "SAMPLE DESIGN STRATIFICATION VARIABLE"
##  $ _STRWT  : num [1:486303] 37.4 37.4 37.4 37.4 37.4 ...
##   ..- attr(*, "label")= chr "STRATUM WEIGHT"
##  $ _LLCPWT : num [1:486303] 768 330 291 211 1583 ...
##   ..- attr(*, "label")= chr "FINAL WEIGHT: LAND-LINE AND CELL-PHONE D"
##  $ _LLCPWT2: num [1:486303] 395 395 197 395 592 ...
##   ..- attr(*, "label")= chr "TRUNCATED DESIGN WEIGHT USED IN ADULT CO"
df2_2016 <- clean_names(df2_2016)
names(df2_2016)
##  [1] "state"    "dispcode" "sex"      "genhlth"  "physhlth" "phys14d" 
##  [7] "menthlth" "ment14d"  "addepev2" "diabete3" "hlthpln1" "asthma3" 
## [13] "ltasth1"  "michd"    "chcscncr" "chcocncr" "hispanc"  "race"    
## [19] "racegr3"  "psu"      "ststr"    "strwt"    "llcpwt"   "llcpwt2"

The results show a tibble (data frame) with 486,303 records and 23 variables of interest. Another point of interest is the variable names, which are a tad messy. We can clean those using the janitor package.

df2_2016 <- clean_names(df2_2016)

names(df2_2016)
##  [1] "state"    "dispcode" "sex"      "genhlth"  "physhlth" "phys14d" 
##  [7] "menthlth" "ment14d"  "addepev2" "diabete3" "hlthpln1" "asthma3" 
## [13] "ltasth1"  "michd"    "chcscncr" "chcocncr" "hispanc"  "race"    
## [19] "racegr3"  "psu"      "ststr"    "strwt"    "llcpwt"   "llcpwt2"

We will start our analysis performing basic exploratory data analysis for variables of interest and perform cross tabulations.