library(SASxport)
library(naniar)
library(ggplot2)
library(gapminder)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(stringr)

Read Data

https://www.cdc.gov/brfss/annual_data/annual_data.htm

BRFSS2020 = read.xport("LLCP2020.XPT")
BRFSS2020

https://www.cdc.gov/brfss/annual_data/2020/pdf/2020-calculated-variables-version4-508.pdf

This dataset contains hundreds of variables

colnames(BRFSS2020)
##   [1] "X.STATE"   "FMONTH"    "IDATE"     "IMONTH"    "IDAY"      "IYEAR"    
##   [7] "DISPCODE"  "SEQNO"     "X.PSU"     "CTELENM1"  "PVTRESD1"  "COLGHOUS" 
##  [13] "STATERE1"  "CELPHONE"  "LADULT1"   "COLGSEX"   "NUMADULT"  "LANDSEX"  
##  [19] "NUMMEN"    "NUMWOMEN"  "RESPSLCT"  "SAFETIME"  "CTELNUM1"  "CELLFON5" 
##  [25] "CADULT1"   "CELLSEX"   "PVTRESD3"  "CCLGHOUS"  "CSTATE1"   "LANDLINE" 
##  [31] "HHADULT"   "SEXVAR"    "GENHLTH"   "PHYSHLTH"  "MENTHLTH"  "POORHLTH" 
##  [37] "HLTHPLN1"  "PERSDOC2"  "MEDCOST"   "CHECKUP1"  "EXERANY2"  "SLEPTIM1" 
##  [43] "CVDINFR4"  "CVDCRHD4"  "CVDSTRK3"  "ASTHMA3"   "ASTHNOW"   "CHCSCNCR" 
##  [49] "CHCOCNCR"  "CHCCOPD2"  "HAVARTH4"  "ADDEPEV3"  "CHCKDNY2"  "DIABETE4" 
##  [55] "DIABAGE3"  "LASTDEN4"  "RMVTETH4"  "MARITAL"   "EDUCA"     "RENTHOM1" 
##  [61] "NUMHHOL3"  "NUMPHON3"  "CPDEMO1B"  "VETERAN3"  "EMPLOY1"   "CHILDREN" 
##  [67] "INCOME2"   "PREGNANT"  "WEIGHT2"   "HEIGHT3"   "DEAF"      "BLIND"    
##  [73] "DECIDE"    "DIFFWALK"  "DIFFDRES"  "DIFFALON"  "SMOKE100"  "SMOKDAY2" 
##  [79] "STOPSMK2"  "LASTSMK2"  "USENOW3"   "ALCDAY5"   "AVEDRNK3"  "DRNK3GE5" 
##  [85] "MAXDRNKS"  "FLUSHOT7"  "FLSHTMY3"  "SHINGLE2"  "PNEUVAC4"  "FALL12MN" 
##  [91] "FALLINJ4"  "SEATBELT"  "DRNKDRI2"  "HADMAM"    "HOWLONG"   "HADPAP2"  
##  [97] "LASTPAP2"  "HPVTEST"   "HPLSTTST"  "HADHYST2"  "PCPSAAD3"  "PCPSADI1" 
## [103] "PCPSARE1"  "PSATEST1"  "PSATIME"   "PCPSARS1"  "COLNSCPY"  "COLNTEST" 
## [109] "SIGMSCPY"  "SIGMTEST"  "BLDSTOL1"  "LSTBLDS4"  "STOOLDNA"  "SDNATEST" 
## [115] "VIRCOLON"  "VCLNTEST"  "HIVTST7"   "HIVTSTD3"  "HIVRISK5"  "PDIABTST" 
## [121] "PREDIAB1"  "INSULIN1"  "BLDSUGAR"  "FEETCHK3"  "DOCTDIAB"  "CHKHEMO3" 
## [127] "FEETCHK"   "EYEEXAM1"  "DIABEYE"   "DIABEDU"   "TOLDCFS"   "HAVECFS"  
## [133] "WORKCFS"   "TOLDHEPC"  "TRETHEPC"  "PRIRHEPC"  "HAVEHEPC"  "HAVEHEPB" 
## [139] "MEDSHEPB"  "HLTHCVR1"  "CIMEMLOS"  "CDHOUSE"   "CDASSIST"  "CDHELP"   
## [145] "CDSOCIAL"  "CDDISCUS"  "CAREGIV1"  "CRGVREL4"  "CRGVLNG1"  "CRGVHRS1" 
## [151] "CRGVPRB3"  "CRGVALZD"  "CRGVPER1"  "CRGVHOU1"  "CRGVEXPT"  "ECIGARET" 
## [157] "ECIGNOW"   "MARIJAN1"  "USEMRJN2"  "RSNMRJN1"  "LCSFIRST"  "LCSLAST"  
## [163] "LCSNUMCG"  "LCSCTSCN"  "CNCRDIFF"  "CNCRAGE"   "CNCRTYP1"  "CSRVTRT3" 
## [169] "CSRVDOC1"  "CSRVSUM"   "CSRVRTRN"  "CSRVINST"  "CSRVINSR"  "CSRVDEIN" 
## [175] "CSRVCLIN"  "CSRVPAIN"  "CSRVCTL2"  "PCPSADE1"  "PCDMDEC1"  "HPVADVC4" 
## [181] "HPVADSHT"  "TETANUS1"  "IMFVPLA1"  "BIRTHSEX"  "SOMALE"    "SOFEMALE" 
## [187] "TRNSGNDR"  "ACEDEPRS"  "ACEDRINK"  "ACEDRUGS"  "ACEPRISN"  "ACEDIVRC" 
## [193] "ACEPUNCH"  "ACEHURT1"  "ACESWEAR"  "ACETOUCH"  "ACETTHEM"  "ACEHVSEX" 
## [199] "RCSGENDR"  "RCSRLTN2"  "CASTHDX2"  "CASTHNO2"  "QSTVER"    "QSTLANG"  
## [205] "X.METSTAT" "X.URBSTAT" "MSCODE"    "X.STSTR"   "X.STRWT"   "X.RAWRAKE"
## [211] "X.WT2RAKE" "X.IMPRACE" "X.CHISPNC" "X.CRACE1"  "X.CPRACE"  "X.CLLCPWT"
## [217] "X.DUALUSE" "X.DUALCOR" "X.LLCPWT2" "X.LLCPWT"  "X.RFHLTH"  "X.PHYS14D"
## [223] "X.MENT14D" "X.HCVU651" "X.TOTINDA" "X.MICHD"   "X.LTASTH1" "X.CASTHM1"
## [229] "X.ASTHMS1" "X.DRDXAR2" "X.EXTETH3" "X.ALTETH3" "X.DENVST3" "X.PRACE1" 
## [235] "X.MRACE1"  "X.HISPANC" "X.RACE"    "X.RACEG21" "X.RACEGR3" "X.RACEPRV"
## [241] "X.SEX"     "X.AGEG5YR" "X.AGE65YR" "X.AGE80"   "X.AGE.G"   "HTIN4"    
## [247] "HTM4"      "WTKG3"     "X.BMI5"    "X.BMI5CAT" "X.RFBMI5"  "X.CHLDCNT"
## [253] "X.EDUCAG"  "X.INCOMG"  "X.SMOKER3" "X.RFSMOK3" "DRNKANY5"  "DROCDY3." 
## [259] "X.RFBING5" "X.DRNKWK1" "X.RFDRHV7" "X.FLSHOT7" "X.PNEUMO3" "X.RFSEAT2"
## [265] "X.RFSEAT3" "X.DRNKDRV" "X.RFMAM22" "X.MAM5023" "X.RFPAP35" "X.RFPSA23"
## [271] "X.CLNSCPY" "X.SGMSCPY" "X.SGMS10Y" "X.RFBLDS4" "X.STOLDNA" "X.VIRCOLN"
## [277] "X.SBONTIM" "X.CRCREC1" "X.AIDTST4"

Choose 12 variables

BRFSS2020 = BRFSS2020 %>%
  select(SEQNO,IDATE,X.SEX,X.AGEG5YR,X.RACE,HTM4,WTKG3,X.BMI5CAT,X.SMOKER3,DROCDY3.,X.TOTINDA,X.MICHD)%>%
  rename(Date = IDATE,
         Sex = X.SEX,
         Age.CAT = X.AGEG5YR,
         Height.M = HTM4,
         Weitht.KG = WTKG3,
         BMI.CAT = X.BMI5CAT,
         Race.CAT = X.RACE,
         Smoker = X.SMOKER3,
         Drinking = DROCDY3.,
         Exercise = X.TOTINDA,
         CornoaryHeartD = X.MICHD)
BRFSS2020

Variables

https://www.cdc.gov/brfss/annual_data/2020/pdf/2020-calculated-variables-version4-508.pdf

Age:

Sex:

Race:

BMI.CAT:

Smoker:

Drinking:

Exercise:

CornoaryHeartD:

. Not asked or Missing Respondents who reported they didn´t know, refused or had a missing value for the MI or CHD questions (CVDINFR4=7, 9 OR MISSING OR CVDCRHD4=7, 9, OR MISSING)

Filter out missing data

BRFSS2020 = BRFSS2020 %>%
  drop_na() %>%
  filter(Age.CAT !=14, # 14 = Don't know, Refused,Missing
         Smoker != 9, # 9 = Don't know, Refused, Missing
         Drinking != 900, # 900 = Don't know, Refused, Missing
         Exercise != 9) # 9 = Don't know, Refused, Missing

Change values of valuables that match with they are meanings

BRFSS2020 = BRFSS2020 %>%
  mutate(Exercise = case_when(Exercise == 2 ~ 1,
                              TRUE ~ 2),  # change to opposite sequence in Exercise.
         Smoker = case_when(Smoker == 1 ~ 4,
                            Smoker == 2 ~ 3,
                            Smoker == 3 ~ 2,
                             TRUE ~ 1),     # change to opposite sequence in Smoker
         CornoaryHeartD = case_when(CornoaryHeartD == 2 ~ 1,
                                    TRUE ~ 2)) # change to opposite sequence in CornoaryHeartD
BRFSS2020

Smoker:

summary(BRFSS2020)
##         SEQNO              Date             Sex           Age.CAT      
##  2020000297:    52   03172020:  1747   Min.   :1.000   Min.   : 1.000  
##  2020000946:    52   03122020:  1735   1st Qu.:1.000   1st Qu.: 5.000  
##  2020001059:    52   03162020:  1720   Median :2.000   Median : 8.000  
##  2020001159:    52   03112020:  1696   Mean   :1.525   Mean   : 7.563  
##  2020001679:    52   03182020:  1666   3rd Qu.:2.000   3rd Qu.:10.000  
##  2020000070:    51   11172020:  1639   Max.   :2.000   Max.   :13.000  
##  (Other)   :339022   (Other) :329130                                   
##     Race.CAT        Height.M       Weitht.KG        BMI.CAT          Smoker    
##  Min.   :1.000   Min.   : 91.0   Min.   : 2495   Min.   :1.000   Min.   :1.00  
##  1st Qu.:1.000   1st Qu.:163.0   1st Qu.: 6804   1st Qu.:2.000   1st Qu.:1.00  
##  Median :1.000   Median :170.0   Median : 7938   Median :3.000   Median :1.00  
##  Mean   :2.088   Mean   :170.3   Mean   : 8248   Mean   :2.985   Mean   :1.66  
##  3rd Qu.:1.000   3rd Qu.:178.0   3rd Qu.: 9299   3rd Qu.:4.000   3rd Qu.:2.00  
##  Max.   :9.000   Max.   :234.0   Max.   :29030   Max.   :4.000   Max.   :4.00  
##                                                                                
##     Drinking         Exercise     CornoaryHeartD 
##  Min.   :  0.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:  0.00   1st Qu.:2.000   1st Qu.:1.000  
##  Median :  3.00   Median :2.000   Median :1.000  
##  Mean   : 17.87   Mean   :1.768   Mean   :1.089  
##  3rd Qu.: 23.00   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :100.00   Max.   :2.000   Max.   :2.000  
## 

Plot data

ggplot(BRFSS2020) +
  aes(x = CornoaryHeartD, y = Smoker,fill = factor(BMI.CAT)) +
  geom_col(position = "fill")