Working on the 2019 BRFSS here.
download.file("https://www.cdc.gov/brfss/annual_data/2019/files/LLCP2019XPT.zip", destfile = "LLCP2019XPT.zip")
unzip("LLCP2019XPT.zip")
Now read, and use make.names = FALSE
to avoid munging column names.
library(foreign)
brfss <- read.xport("LLCP2019.XPT")
colnames(brfss) <- sub("X_", "_", colnames(brfss)) #fix "X_" column names / variable names by getting rid of X
Finally, make a tibble.
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
brfss <- tibble(brfss)
Start by scraping the visual codebook web page, which is full of HTML tables.
library(tidyverse)
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
library(httr)
url <- "https://www.cdc.gov/brfss/annual_data/2019/pdf/codebook19_llcp-v2-508.HTML"
htmldat <- read_html(url) %>%
html_nodes(css = "table")
idx <- grep('class=\"table\"', htmldat)
codetables <- lapply(idx, function(i){
html_table(htmldat[[i]])
})
The first table isn’t part of the codebook.
codetables <- codetables[-1]
Now do some really ugly text parsing.
for (i in seq_along(codetables)){
extr <- codetables[[i]][1, 1] %>%
str_split_fixed("Variable\\sName:\\s|Question\\sPrologue:|Question:\\s+", 4)
codetables[[i]] %<>%
mutate(varname = extr[2],
varlabel = extr[4]) %>%
slice(-1:-2) %>%
select(-X3:-X5)
}
codetab <- do.call(rbind, codetables) %>%
tibble()
At least we now have a nice, computable codebook:
codetab
## # A tibble: 1,942 x 4
## X1 X2 varname varlabel
## <chr> <chr> <chr> <chr>
## 1 1 Alabama _STATE State FIPS Code
## 2 2 Alaska _STATE State FIPS Code
## 3 4 Arizona _STATE State FIPS Code
## 4 5 Arkansas _STATE State FIPS Code
## 5 6 California _STATE State FIPS Code
## 6 8 Colorado _STATE State FIPS Code
## 7 9 Connecticut _STATE State FIPS Code
## 8 10 Delaware _STATE State FIPS Code
## 9 11 District of Columbia _STATE State FIPS Code
## 10 12 Florida _STATE State FIPS Code
## # … with 1,932 more rows
Make a list of all the variable names:
allvars <- codetab$varname %>%
unique()
summary(allvars %in% colnames(brfss)) #should all be found
## Mode TRUE
## logical 342
Now do the high-throughput recoding. Don’t recode variables that start with "_" (these seem to be decimal), those that say “decimal” in the variable label, or those that only attempt to recode “BLANK” (these are already NA in this dataset).
A number of the variables produce warnings about NAs introduced by coercion. These still seem to have useful recoding done, but merit double-checking if they’re important to your analysis.
for (i in grep("_", allvars, invert = TRUE)){
onevar <- allvars[i]
print(onevar)
minicodebook <- filter(codetab, varname == onevar) %>%
filter(X1 != "BLANK")
level_key <- minicodebook$X2
names(level_key) <- as.character(minicodebook$X1)
if(any(grepl("decimal", minicodebook$X2))) next
if (length(level_key) > 0)
brfss[[onevar]] <- recode(brfss[[onevar]], !!!level_key)
}
## [1] "FMONTH"
## [1] "IDATE"
## [1] "IMONTH"
## [1] "IDAY"
## [1] "IYEAR"
## [1] "DISPCODE"
## [1] "SEQNO"
## [1] "CTELENM1"
## [1] "PVTRESD1"
## [1] "COLGHOUS"
## [1] "STATERE1"
## [1] "CELPHONE"
## [1] "LADULT1"
## [1] "COLGSEX"
## [1] "NUMADULT"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
## [1] "LANDSEX"
## [1] "NUMMEN"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "NUMWOMEN"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "RESPSLCT"
## [1] "SAFETIME"
## [1] "CTELNUM1"
## [1] "CELLFON5"
## [1] "CADULT1"
## [1] "CELLSEX"
## [1] "PVTRESD3"
## [1] "CCLGHOUS"
## [1] "CSTATE1"
## [1] "LANDLINE"
## [1] "HHADULT"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "SEXVAR"
## [1] "GENHLTH"
## [1] "PHYSHLTH"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "MENTHLTH"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "POORHLTH"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "HLTHPLN1"
## [1] "PERSDOC2"
## [1] "MEDCOST"
## [1] "CHECKUP1"
## [1] "BPHIGH4"
## [1] "BPMEDS"
## [1] "CHOLCHK2"
## [1] "TOLDHI2"
## [1] "CHOLMED2"
## [1] "CVDINFR4"
## [1] "CVDCRHD4"
## [1] "CVDSTRK3"
## [1] "ASTHMA3"
## [1] "ASTHNOW"
## [1] "CHCSCNCR"
## [1] "CHCOCNCR"
## [1] "CHCCOPD2"
## [1] "ADDEPEV3"
## [1] "CHCKDNY2"
## [1] "DIABETE4"
## [1] "DIABAGE3"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "HAVARTH4"
## [1] "ARTHEXER"
## [1] "ARTHEDU"
## [1] "LMTJOIN3"
## [1] "ARTHDIS2"
## [1] "JOINPAI2"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "MARITAL"
## [1] "EDUCA"
## [1] "RENTHOM1"
## [1] "NUMHHOL3"
## [1] "NUMPHON3"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "CPDEMO1B"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "VETERAN3"
## [1] "EMPLOY1"
## [1] "CHILDREN"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "INCOME2"
## [1] "WEIGHT2"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "HEIGHT3"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "PREGNANT"
## [1] "DEAF"
## [1] "BLIND"
## [1] "DECIDE"
## [1] "DIFFWALK"
## [1] "DIFFDRES"
## [1] "DIFFALON"
## [1] "SMOKE100"
## [1] "SMOKDAY2"
## [1] "STOPSMK2"
## [1] "LASTSMK2"
## [1] "USENOW3"
## [1] "ALCDAY5"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "AVEDRNK3"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "DRNK3GE5"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "MAXDRNKS"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "EXERANY2"
## [1] "EXRACT11"
## [1] "EXEROFT1"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "EXERHMM1"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "EXRACT21"
## [1] "EXEROFT2"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "EXERHMM2"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "STRENGTH"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "FRUIT2"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "FRUITJU2"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "FVGREEN1"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "FRENCHF1"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "POTATOE1"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "VEGETAB2"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "FLUSHOT7"
## [1] "FLSHTMY3"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "TETANUS1"
## [1] "PNEUVAC4"
## [1] "HIVTST7"
## [1] "HIVTSTD3"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "HIVRISK5"
## [1] "PDIABTST"
## [1] "PREDIAB1"
## [1] "INSULIN1"
## [1] "BLDSUGAR"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "FEETCHK3"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "DOCTDIAB"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "CHKHEMO3"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "FEETCHK"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "EYEEXAM1"
## [1] "DIABEYE"
## [1] "DIABEDU"
## [1] "TOLDCFS"
## [1] "HAVECFS"
## [1] "WORKCFS"
## [1] "TOLDHEPC"
## [1] "TRETHEPC"
## [1] "PRIRHEPC"
## [1] "HAVEHEPC"
## [1] "HAVEHEPB"
## [1] "MEDSHEPB"
## [1] "HPVADVC3"
## [1] "HPVADSHT"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "IMFVPLA1"
## [1] "SHINGLE2"
## [1] "LCSFIRST"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "LCSLAST"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "LCSNUMCG"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "LCSCTSCN"
## [1] "HADMAM"
## [1] "HOWLONG"
## [1] "HADPAP2"
## [1] "LASTPAP2"
## [1] "HPVTEST"
## [1] "HPLSTTST"
## [1] "HADHYST2"
## [1] "PCPSAAD3"
## [1] "PCPSADI1"
## [1] "PCPSARE1"
## [1] "PSATEST1"
## [1] "PSATIME"
## [1] "PCPSARS1"
## [1] "PCPSADE1"
## [1] "PCDMDEC1"
## [1] "BLDSTOOL"
## [1] "LSTBLDS3"
## [1] "HADSIGM3"
## [1] "HADSGCO1"
## [1] "LASTSIG3"
## [1] "CNCRDIFF"
## [1] "CNCRAGE"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "CNCRTYP1"
## [1] "CSRVTRT3"
## [1] "CSRVDOC1"
## [1] "CSRVSUM"
## [1] "CSRVRTRN"
## [1] "CSRVINST"
## [1] "CSRVINSR"
## [1] "CSRVDEIN"
## [1] "CSRVCLIN"
## [1] "CSRVPAIN"
## [1] "CSRVCTL2"
## [1] "HLTHCVR1"
## [1] "ASPIRIN"
## [1] "HOMBPCHK"
## [1] "HOMRGCHK"
## [1] "WHEREBP"
## [1] "SHAREBP"
## [1] "WTCHSALT"
## [1] "DRADVISE"
## [1] "INDORTAN"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "NUMBURN3"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "SUNPRTCT"
## [1] "WKDAYOUT"
## [1] "WKENDOUT"
## [1] "CIMEMLOS"
## [1] "CDHOUSE"
## [1] "CDASSIST"
## [1] "CDHELP"
## [1] "CDSOCIAL"
## [1] "CDDISCUS"
## [1] "CAREGIV1"
## [1] "CRGVREL3"
## [1] "CRGVLNG1"
## [1] "CRGVHRS1"
## [1] "CRGVPRB3"
## [1] "CRGVALZD"
## [1] "CRGVPER1"
## [1] "CRGVHOU1"
## [1] "CRGVEXPT"
## [1] "ACEDEPRS"
## [1] "ACEDRINK"
## [1] "ACEDRUGS"
## [1] "ACEPRISN"
## [1] "ACEDIVRC"
## [1] "ACEPUNCH"
## [1] "ACEHURT1"
## [1] "ACESWEAR"
## [1] "ACETOUCH"
## [1] "ACETTHEM"
## [1] "ACEHVSEX"
## [1] "PFPPRVN3"
## [1] "TYPCNTR8"
## [1] "NOBCUSE7"
## [1] "ASBIALCH"
## [1] "ASBIDRNK"
## [1] "ASBIBING"
## [1] "ASBIADVC"
## [1] "ASBIRDUC"
## [1] "MARIJAN1"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "USEMRJN2"
## [1] "RSNMRJN1"
## [1] "FOODSTMP"
## [1] "BIRTHSEX"
## [1] "SOMALE"
## [1] "SOFEMALE"
## [1] "TRNSGNDR"
## [1] "RCSGENDR"
## [1] "RCSRLTN2"
## [1] "CASTHDX2"
## [1] "CASTHNO2"
## [1] "QSTVER"
## [1] "QSTLANG"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "MSCODE"
## [1] "HTIN4"
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): NAs introduced by
## coercion
## Warning in recode.numeric(brfss[[onevar]], !!!level_key): Unreplaced values
## treated as NA as .x is not compatible. Please specify replacements exhaustively
## or supply .default
## [1] "HTM4"
## [1] "WTKG3"
## [1] "DRNKANY5"
save(brfss, file = "brfss.rda")
Here is a Dropbox link to the recoded tibble brfss.rda that you can load just by doing load("brfss.rda")
.