# Load required packages
library(tidyr)
library(tidyverse)
library(haven)
library(knitr)
library(kableExtra)
library(plotly)
library(broom)
library(car)
library(ggeffects)
library(gtsummary)
library(ggstats)
library(janitor)
library(broom.helpers)
library(ggplot2)
library(survey)
library(purrr)
library(stringr)The BRFSS is a large-scale telephone survey that collects data on health-related risk behaviors, chronic health conditions, and use of preventive services from U.S. residents.
# Load the full BRFSS 2024 dataset
brfss_full <- read_xpt("LLCP2024.XTP") |>
janitor::clean_names()
# Display dataset dimensions
names(brfss_full)## [1] "state" "fmonth" "idate" "imonth" "iday" "iyear"
## [7] "dispcode" "seqno" "psu" "ctelenm1" "pvtresd1" "colghous"
## [13] "statere1" "celphon1" "ladult1" "numadult" "respslc1" "landsex3"
## [19] "safetime" "ctelnum1" "cellfon5" "cadult1" "cellsex3" "pvtresd3"
## [25] "cclghous" "cstate1" "landline" "hhadult" "sexvar" "genhlth"
## [31] "physhlth" "menthlth" "poorhlth" "primins2" "persdoc3" "medcost1"
## [37] "checkup1" "exerany2" "lastden4" "rmvteth4" "cvdinfr4" "cvdcrhd4"
## [43] "cvdstrk3" "asthma3" "asthnow" "chcscnc1" "chcocnc1" "chccopd3"
## [49] "addepev3" "chckdny2" "havarth4" "diabete4" "diabage4" "marital"
## [55] "educa" "renthom1" "numhhol4" "numphon4" "cpdemo1c" "veteran3"
## [61] "employ1" "children" "income3" "pregnant" "weight2" "height3"
## [67] "deaf" "blind" "decide" "diffwalk" "diffdres" "diffalon"
## [73] "hadmam" "howlong" "cervscrn" "crvclcnc" "crvclpap" "crvclhpv"
## [79] "hadhyst2" "hadsigm4" "colnsigm" "colntes1" "sigmtes1" "lastsig4"
## [85] "colncncr" "vircolo1" "vclntes2" "smalstol" "stoltest" "stooldn2"
## [91] "bldstfit" "sdnates1" "smoke100" "smokday2" "usenow3" "ecignow3"
## [97] "lcsfirst" "lcsnumcg" "lcsctsc1" "lcsscncr" "lcsctwhn" "alcday4"
## [103] "avedrnk4" "drnk3ge5" "maxdrnks" "flushot7" "flshtmy3" "imfvpla5"
## [109] "pneuvac4" "hivtst7" "hivtstd3" "hivrisk5" "pdiabts1" "prediab2"
## [115] "diabtype" "insulin1" "chkhemo3" "eyeexam1" "diabeye1" "diabedu1"
## [121] "feetsore" "arthexer" "shingle2" "hpvadvc4" "hpvadsh1" "tetanus1"
## [127] "cncrdiff" "cncrage" "cncrtyp2" "csrvtrt3" "csrvdoc1" "csrvsum"
## [133] "csrvrtrn" "csrvinst" "csrvinsr" "csrvdein" "csrvclin" "csrvpain"
## [139] "csrvctl2" "psatest1" "psatime1" "pcpsars2" "psasugs1" "pcstalk2"
## [145] "cimemlo1" "cdworry" "cddiscu1" "cdhous1" "cdsocia1" "caregiv1"
## [151] "crgvrel5" "crgvprb4" "crgvalzd" "crgvnurs" "crgvper2" "crgvhou2"
## [157] "crgvhrs2" "crgvlng2" "acedeprs" "acedrink" "acedrugs" "aceprisn"
## [163] "acedivrc" "acepunch" "acehurt1" "aceswear" "acetouch" "acetthem"
## [169] "acehvsex" "aceadsaf" "aceadned" "lsatisfy" "emtsuprt" "sdlonely"
## [175] "sdhemply" "foodstmp" "sdhfood1" "sdhbills" "sdhutils" "sdhtrnsp"
## [181] "howsafe1" "marijan1" "marjsmok" "marjeat" "marjvape" "marjdab"
## [187] "marjothr" "usemrjn4" "lastsmk2" "stopsmk2" "mentcigs" "mentecig"
## [193] "heattbco" "ssbsugr2" "ssbfrut3" "firearm5" "gunload" "loadulk2"
## [199] "rcsgend1" "rcsxbrth" "rcsrltn2" "casthdx2" "casthno2" "somale"
## [205] "sofemale" "hadsex" "pfpprvn4" "typcntr9" "nobcuse8" "qstver"
## [211] "qstlang" "hpvdsht" "icfqstvr" "metstat" "urbstat" "mscode"
## [217] "ststr" "strwt" "rawrake" "wt2rake" "imprace" "chispnc"
## [223] "crace1" "cageg" "cllcpwt" "dualuse" "dualcor" "llcpwt2"
## [229] "llcpwt" "rfhlth" "phys14d" "ment14d" "hlthpl2" "hcvu654"
## [235] "totinda" "exteth3" "alteth3" "denvst3" "michd" "ltasth1"
## [241] "casthm1" "asthms1" "drdxar2" "mrace1" "hispanc" "race"
## [247] "raceg21" "racegr3" "raceprv" "sex" "ageg5yr" "age65yr"
## [253] "age80" "age_g" "htin4" "htm4" "wtkg3" "bmi5"
## [259] "bmi5cat" "rfbmi5" "chldcnt" "educag" "incomg1" "rfmam23"
## [265] "mam402y" "crvscrn" "rfpap37" "hpv5yr1" "paphpv1" "hadcoln"
## [271] "clnscp2" "hadsigm" "sgmscp2" "sgms102" "rfblds6" "stoldn2"
## [277] "vircol2" "sbonti2" "crcrec3" "smoker3" "rfsmok3" "cureci3"
## [283] "lcslast" "lcsnumc" "lcsage" "lcsysmk" "packday" "packyrs"
## [289] "lcsyqts" "lcssmkg" "lcselig" "lcsctsn" "lcspstf" "drnkany6"
## [295] "drocdy4" "rfbing6" "drnkwk3" "rfdrhv9" "flshot7" "pneumo3"
## [301] "aidtst4"
# Display full dataset dimensions
cat("Full Data Dimensions:",
nrow(brfss_full), "observations,",
ncol(brfss_full), "variables\n")## Full Data Dimensions: 457670 observations, 301 variables
tibble(Metric = c("Observations", "Variables"),
Value = c(nrow(brfss_full), ncol(brfss_full))) |>
kable(caption = "Full Dataset Dimensions") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Metric | Value |
|---|---|
| Observations | 457670 |
| Variables | 301 |
# Select variables of interest and create analytic dataset
brfss_subset <- brfss_full |>
select(
# Outcome: Have you ever had a cervical cancer screening test
cervscrn,
# Demographics
ageg5yr, # Age category, fourteen-level age category
sexvar, # Sex
race, # Race/ethnicity
educag, # Education level
employ1, # Employment status
# Health status
genhlth, # General health
incomg1, # Income category
urbstat, # Urban/Rural Status
primins2, # Primary source of health care coverage
persdoc3, # Personal health care provider
medcost1, # Delayed care due to cost
checkup1, # Time since last routine checkup
llcpwt,
ststr,
psu
)
# Display full dataset dimensions
cat("Full Subset Dimensions:",
nrow(brfss_subset), "observations,",
ncol(brfss_subset), "variables\n")## Full Subset Dimensions: 457670 observations, 16 variables
tibble(Metric = c("Observations", "Variables"),
Value = c(nrow(brfss_subset), ncol(brfss_subset))) |>
kable(caption = "Full Subset Dataset Dimensions") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Metric | Value |
|---|---|
| Observations | 457670 |
| Variables | 16 |
# Filter for only females
brfss_female <- brfss_subset |>
filter(
sexvar == 2) # 2 = Female in BRFSS
# Display first exclusion dataset dimensions
names(brfss_female)## [1] "cervscrn" "ageg5yr" "sexvar" "race" "educag" "employ1"
## [7] "genhlth" "incomg1" "urbstat" "primins2" "persdoc3" "medcost1"
## [13] "checkup1" "llcpwt" "ststr" "psu"
# Display first exclusion dataset dimensions
cat("First Exclusion Data dimensions:",
nrow(brfss_female), "observations,",
ncol(brfss_female), "variables\n")## First Exclusion Data dimensions: 240183 observations, 16 variables
tibble(Metric = c("Observations", "Variables"),
Value = c(nrow(brfss_female), ncol(brfss_female))) |>
kable(caption = "First Exclusion Dataset Dimensions") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Metric | Value |
|---|---|
| Observations | 240183 |
| Variables | 16 |
# Filter for age (25-64)
brfss_filtered <- brfss_female |>
filter(
ageg5yr %in% 2:9) # 1–9 correspond to ages 25–64
# Display second exclusion dataset dimensions
names(brfss_filtered)## [1] "cervscrn" "ageg5yr" "sexvar" "race" "educag" "employ1"
## [7] "genhlth" "incomg1" "urbstat" "primins2" "persdoc3" "medcost1"
## [13] "checkup1" "llcpwt" "ststr" "psu"
# Display second exclusion dataset dimensions
cat("Second Exclusion Data dimensions:",
nrow(brfss_filtered), "observations,",
ncol(brfss_filtered), "variables\n")## Second Exclusion Data dimensions: 128221 observations, 16 variables
tibble(Metric = c("Observations", "Variables"),
Value = c(nrow(brfss_filtered), ncol(brfss_filtered))) |>
kable(caption = "Second Exclusion Dataset Dimensions") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Metric | Value |
|---|---|
| Observations | 128221 |
| Variables | 16 |
# Recode variables
brfss_recode <- brfss_filtered |>
mutate(
cervicalscr_bin = ifelse(cervscrn == 1, 1, 0),
cervicalscr_fac = factor(cervicalscr_bin,
levels = c(0, 1),
labels = c("Not screened", "Screened")),
age = factor(case_when(
ageg5yr == 2 ~ "25-29",
ageg5yr == 3 ~ "30-34",
ageg5yr == 4 ~ "35-39",
ageg5yr == 5 ~ "40-44",
ageg5yr == 6 ~ "45-49",
ageg5yr == 7 ~ "50-54",
ageg5yr == 8 ~ "55-59",
ageg5yr == 9 ~ "60-64"),
levels = c("25-29","30-34","35-39","40-44", "45-49","50-54","55-59","60-64")),
sex = factor("Female",
levels = "Female"),
education = factor(case_when(
educag == 1 ~ "Less than high school",
educag == 2 ~ "High school graduate",
educag == 3 ~ "Some college",
educag == 4 ~ "College graduate"),
levels = c("Less than high school", "High school graduate",
"Some college", "College graduate")),
race_cat = factor(case_when(
race == 1 ~ "White",
race == 2 ~ "Black",
race == 3 ~ "American Indian or Alaska Native",
race == 4 ~ "Asian",
race == 5 ~ "Native Hawaiian or other Pacific Islander",
race == 7 ~ "Multiracial",
race == 8 ~ "Hispanic"),
levels = c("White",
"Black",
"American Indian or Alaska Native",
"Asian",
"Native Hawaiian or other Pacific Islander",
"Multiracial",
"Hispanic")),
employ1_cat = factor(case_when(
employ1 == 1 ~ "Employed for wages",
employ1 == 2 ~ "Self-employed",
employ1 == 3 ~ "Out of work for 1 year or more",
employ1 == 4 ~ "Out of work for less than 1 year",
employ1 == 5 ~ "A homemaker",
employ1 == 6 ~ "A student",
employ1 == 7 ~ "Retired",
employ1 == 8 ~ "Unable to work"),
levels = c("Employed for wages", "Self-employed",
"Out of work for 1 year or more", "Out of work for less than 1 year",
"A homemaker", "A student", "Retired", "Unable to work")),
gen_health = factor(case_when(
genhlth == 1 ~ "Excellent",
genhlth == 2 ~ "Very Good",
genhlth == 3 ~ "Good",
genhlth == 4 ~ "Fair",
genhlth == 5 ~ "Poor"),
levels = c("Excellent", "Very Good", "Good", "Fair", "Poor")),
income = factor(case_when(
incomg1 == 1 ~ "Less than $15,000",
incomg1 == 2 ~ "$15,000 to < $25,000",
incomg1 == 3 ~ "$25,000 to < $35,000",
incomg1 == 4 ~ "$35,000 to < $50,000",
incomg1 == 5 ~ "$50,000 to < $100,000",
incomg1 == 6 ~ "$100,000 to < $200,000",
incomg1 == 7 ~ "More than $200,000"),
levels = c("Less than $15,000", "$15,000 to < $25,000", "$25,000 to < $35,000", "$35,000 to < $50,000", "$50,000 to < $100,000", "$100,000 to < $200,000", "More than $200,000")),
urbstat1 = factor(case_when(
urbstat == 1 ~ "Urban County",
urbstat == 2 ~ "Rural County"),
levels = c("Urban County", "Rural County")),
priminsur = factor(case_when(
primins2 == 1 ~ "Employer/Union Insurance",
primins2 == 2 ~ "Private Nongovernmental Insurance",
primins2 == 3 ~ "Medicare",
primins2 == 4 ~ "Medigap",
primins2 == 5 ~ "Medicaid",
primins2 == 6 ~ "Children's Health Insurance Program",
primins2 == 7 ~ "Military Related Health Care",
primins2 == 8 ~ "Indian Health Service",
primins2 == 9 ~ "State Sponsored Health Plan",
primins2 == 10 ~ "Other Governmental Program",
primins2 == 88 ~ "No Coverage of Any Type"),
levels = c("Employer/Union Insurance", "Private Nongovernmental Insurance", "Medicare", "Medigap", "Medicaid", "Children's Health Insurance Program", "Military Related Health Care", "Indian Health Service", "State Sponsored Health Plan", "Other Governmental Program", "No Coverage of Any Type")),
persdoc = factor(case_when(
persdoc3 == 3 ~ "No",
persdoc3 == 1 ~ "Yes only one",
persdoc3 == 2 ~ "More than one"),
levels = c("No", "Yes only one", "More than one")),
medcost = factor(case_when(
medcost1 == 2 ~ "No",
medcost1 == 1 ~ "Yes"),
levels = c("No", "Yes")),
checkup = factor(case_when(
checkup1 == 8 ~ "Never",
checkup1 == 1 ~ "Within past year",
checkup1 == 2 ~ "Within past 2 years",
checkup1 == 3 ~ "Within past 5 years",
checkup1 == 4 ~ "5 or more years ago",
checkup1 == 7 ~ "Don't Know/Not Sure"),
levels = c("Never", "Within past year", "Within past 2 years", "Within Past 5 years", "5 or more years ago", "Don't Know/Not Sure")))
brfss_recode <- brfss_recode |>
mutate(
cervicalscr_fac = relevel(cervicalscr_fac, ref = "Not screened"),
age = relevel(age, ref = "25-29"),
education = relevel(education, ref = "Less than high school"),
race_cat = relevel(race_cat, ref = "White"),
employ1_cat = relevel(employ1_cat, ref = "Employed for wages"),
gen_health = relevel(gen_health, ref = "Excellent"),
income = relevel(income, ref = "Less than $15,000"),
urbstat1 = relevel(urbstat1, ref = "Urban County"),
priminsur = relevel(priminsur, ref = "Employer/Union Insurance"),
persdoc = relevel(persdoc, ref = "No"),
medcost = relevel(medcost, ref = "No"),
checkup = relevel(checkup, ref = "Within past year"))
# Select only the recoded variables
brfss_recode2 <- brfss_recode |>
select(cervicalscr_bin, cervicalscr_fac, age, sex, education, race_cat, employ1_cat, gen_health, income, urbstat1, priminsur, persdoc, medcost, checkup, llcpwt, ststr, psu)
# Count missing values in each column
missing_summary <- data.frame(
Variable = names(brfss_recode2),
Missing_Count = colSums(is.na(brfss_recode2)),
Missing_Percent = round(colSums(is.na(brfss_recode2)) / nrow(brfss_recode2) * 100, 2)
)
# Show variables with the most missing data
print(missing_summary[order(-missing_summary$Missing_Count), ][1:14, ]) |>
kable(
caption = "Variables by Missingness"
) |>
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "center"
)## Variable Missing_Count Missing_Percent
## income income 19076 14.88
## cervicalscr_bin cervicalscr_bin 7644 5.96
## cervicalscr_fac cervicalscr_fac 7644 5.96
## checkup checkup 6352 4.95
## urbstat1 urbstat1 5044 3.93
## priminsur priminsur 3494 2.72
## race_cat race_cat 2605 2.03
## employ1_cat employ1_cat 2230 1.74
## persdoc persdoc 924 0.72
## education education 422 0.33
## medcost medcost 400 0.31
## gen_health gen_health 259 0.20
## age age 0 0.00
## sex sex 0 0.00
| Variable | Missing_Count | Missing_Percent | |
|---|---|---|---|
| income | income | 19076 | 14.88 |
| cervicalscr_bin | cervicalscr_bin | 7644 | 5.96 |
| cervicalscr_fac | cervicalscr_fac | 7644 | 5.96 |
| checkup | checkup | 6352 | 4.95 |
| urbstat1 | urbstat1 | 5044 | 3.93 |
| priminsur | priminsur | 3494 | 2.72 |
| race_cat | race_cat | 2605 | 2.03 |
| employ1_cat | employ1_cat | 2230 | 1.74 |
| persdoc | persdoc | 924 | 0.72 |
| education | education | 422 | 0.33 |
| medcost | medcost | 400 | 0.31 |
| gen_health | gen_health | 259 | 0.20 |
| age | age | 0 | 0.00 |
| sex | sex | 0 | 0.00 |
brfss_recode3 <- brfss_recode2 |>
# Filter to complete cases only
drop_na()
set.seed(553) # For reproducibility
# Sample 50000 observations for manageable analysis
brfss_analytic <- brfss_recode3 |>
slice_sample(n = 50000)
# Save for later use
saveRDS(brfss_analytic,
"brfss_final_project_2024.rds")
# Read in the Analytic Dataset
brfss_analytic <- readRDS(
"brfss_final_project_2024.rds"
)
# Create survey design
options(survey.lonely.psu = "adjust")
brfss_design <- svydesign(
ids = ~psu,
strata = ~ststr,
weights = ~llcpwt,
nest = TRUE,
data = brfss_analytic
)
# Display subset dimensions
cat("Working subset dimensions:",
nrow(brfss_analytic), "observations,",
ncol(brfss_analytic), "variables\n")## Working subset dimensions: 50000 observations, 17 variables
tibble(Metric = c("Observations", "Variables"),
Value = c(nrow(brfss_analytic), ncol(brfss_analytic))) |>
kable(caption = "Analytic Dataset Dimensions") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Metric | Value |
|---|---|
| Observations | 50000 |
| Variables | 17 |
##
## Not screened Screened
## White 3196 32399
## Black 875 4047
## American Indian or Alaska Native 149 708
## Asian 329 915
## Native Hawaiian or other Pacific Islander 42 151
## Multiracial 127 1193
## Hispanic 1157 4712
# Create Unweighted Table 1 Descriptive Statistics
brfss_analytic |>
select(cervicalscr_fac, race_cat, age, education, employ1_cat, gen_health, income, urbstat1, priminsur, persdoc, medcost, checkup) |>
tbl_summary(
by = race_cat, # Stratify Table by Race/Ethnicity
type = list(medcost ~ "categorical"),
label = list(
cervicalscr_fac ~ "Ever Had a Cervical Cancer Screening Test",
age ~ "Age Category",
education ~ "Level of Education Completed",
employ1_cat ~ "Current Employment Status",
gen_health ~ "General Health Status",
income ~ "Income Category",
urbstat1 ~ "Urban/Rural Status",
priminsur ~ "Current Health Care Coverage",
persdoc ~ "Personal Health Care Provider",
medcost ~ "Could not afford to see Doctor (past 12 months)",
checkup ~ "Time since last routine checkup"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 1,
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Table 1. Unweighted Descriptive Statistics — BRFSS 2024 Analytic Sample (n = 50,000)") |>
modify_footnote(
all_stat_cols() ~
"Analytic sample includes 50,000 complete-case observations. Missing values were excluded listwise for all variables included in this table."
) |>
as_flex_table()Characteristic | N | White | Black | American Indian or Alaska Native | Asian | Native Hawaiian or other Pacific Islander | Multiracial | Hispanic |
|---|---|---|---|---|---|---|---|---|
Ever Had a Cervical Cancer Screening Test | 50,000 | |||||||
Not screened | 3,196 (9.0%) | 875 (18%) | 149 (17%) | 329 (26%) | 42 (22%) | 127 (9.6%) | 1,157 (20%) | |
Screened | 32,399 (91%) | 4,047 (82%) | 708 (83%) | 915 (74%) | 151 (78%) | 1,193 (90%) | 4,712 (80%) | |
Age Category | 50,000 | |||||||
25-29 | 2,409 (6.8%) | 385 (7.8%) | 63 (7.4%) | 193 (16%) | 16 (8.3%) | 158 (12%) | 797 (14%) | |
30-34 | 2,926 (8.2%) | 510 (10%) | 67 (7.8%) | 214 (17%) | 29 (15%) | 192 (15%) | 904 (15%) | |
35-39 | 3,760 (11%) | 589 (12%) | 115 (13%) | 190 (15%) | 21 (11%) | 190 (14%) | 891 (15%) | |
40-44 | 4,308 (12%) | 649 (13%) | 107 (12%) | 162 (13%) | 31 (16%) | 192 (15%) | 839 (14%) | |
45-49 | 4,269 (12%) | 640 (13%) | 120 (14%) | 143 (11%) | 20 (10%) | 166 (13%) | 764 (13%) | |
50-54 | 4,844 (14%) | 730 (15%) | 129 (15%) | 139 (11%) | 27 (14%) | 133 (10%) | 660 (11%) | |
55-59 | 5,796 (16%) | 685 (14%) | 132 (15%) | 100 (8.0%) | 23 (12%) | 139 (11%) | 543 (9.3%) | |
60-64 | 7,283 (20%) | 734 (15%) | 124 (14%) | 103 (8.3%) | 26 (13%) | 150 (11%) | 471 (8.0%) | |
Level of Education Completed | 50,000 | |||||||
Less than high school | 885 (2.5%) | 182 (3.7%) | 57 (6.7%) | 15 (1.2%) | 8 (4.1%) | 46 (3.5%) | 1,183 (20%) | |
High school graduate | 6,255 (18%) | 1,090 (22%) | 208 (24%) | 125 (10%) | 70 (36%) | 237 (18%) | 1,468 (25%) | |
Some college | 9,400 (26%) | 1,499 (30%) | 304 (35%) | 186 (15%) | 63 (33%) | 388 (29%) | 1,321 (23%) | |
College graduate | 19,055 (54%) | 2,151 (44%) | 288 (34%) | 918 (74%) | 52 (27%) | 649 (49%) | 1,897 (32%) | |
Current Employment Status | 50,000 | |||||||
Employed for wages | 22,378 (63%) | 3,169 (64%) | 517 (60%) | 862 (69%) | 110 (57%) | 776 (59%) | 3,168 (54%) | |
Self-employed | 3,052 (8.6%) | 287 (5.8%) | 60 (7.0%) | 106 (8.5%) | 17 (8.8%) | 125 (9.5%) | 549 (9.4%) | |
Out of work for 1 year or more | 677 (1.9%) | 183 (3.7%) | 43 (5.0%) | 28 (2.3%) | 7 (3.6%) | 43 (3.3%) | 239 (4.1%) | |
Out of work for less than 1 year | 802 (2.3%) | 198 (4.0%) | 40 (4.7%) | 40 (3.2%) | 11 (5.7%) | 52 (3.9%) | 302 (5.1%) | |
A homemaker | 2,650 (7.4%) | 144 (2.9%) | 49 (5.7%) | 87 (7.0%) | 19 (9.8%) | 106 (8.0%) | 968 (16%) | |
A student | 359 (1.0%) | 121 (2.5%) | 15 (1.8%) | 56 (4.5%) | 3 (1.6%) | 32 (2.4%) | 89 (1.5%) | |
Retired | 2,650 (7.4%) | 259 (5.3%) | 32 (3.7%) | 37 (3.0%) | 9 (4.7%) | 40 (3.0%) | 131 (2.2%) | |
Unable to work | 3,027 (8.5%) | 561 (11%) | 101 (12%) | 28 (2.3%) | 17 (8.8%) | 146 (11%) | 423 (7.2%) | |
General Health Status | 50,000 | |||||||
Excellent | 5,322 (15%) | 580 (12%) | 92 (11%) | 238 (19%) | 32 (17%) | 157 (12%) | 763 (13%) | |
Very Good | 13,212 (37%) | 1,349 (27%) | 221 (26%) | 423 (34%) | 47 (24%) | 416 (32%) | 1,374 (23%) | |
Good | 11,046 (31%) | 1,904 (39%) | 311 (36%) | 452 (36%) | 64 (33%) | 443 (34%) | 2,273 (39%) | |
Fair | 4,452 (13%) | 874 (18%) | 163 (19%) | 107 (8.6%) | 38 (20%) | 222 (17%) | 1,213 (21%) | |
Poor | 1,563 (4.4%) | 215 (4.4%) | 70 (8.2%) | 24 (1.9%) | 12 (6.2%) | 82 (6.2%) | 246 (4.2%) | |
Income Category | 50,000 | |||||||
Less than $15,000 | 1,646 (4.6%) | 463 (9.4%) | 85 (9.9%) | 54 (4.3%) | 18 (9.3%) | 116 (8.8%) | 709 (12%) | |
$15,000 to < $25,000 | 2,181 (6.1%) | 531 (11%) | 109 (13%) | 63 (5.1%) | 25 (13%) | 115 (8.7%) | 990 (17%) | |
$25,000 to < $35,000 | 2,600 (7.3%) | 660 (13%) | 132 (15%) | 105 (8.4%) | 25 (13%) | 124 (9.4%) | 1,008 (17%) | |
$35,000 to < $50,000 | 3,565 (10%) | 773 (16%) | 148 (17%) | 123 (9.9%) | 26 (13%) | 147 (11%) | 864 (15%) | |
$50,000 to < $100,000 | 10,959 (31%) | 1,400 (28%) | 231 (27%) | 325 (26%) | 52 (27%) | 376 (28%) | 1,194 (20%) | |
$100,000 to < $200,000 | 10,822 (30%) | 828 (17%) | 128 (15%) | 320 (26%) | 42 (22%) | 322 (24%) | 821 (14%) | |
More than $200,000 | 3,822 (11%) | 267 (5.4%) | 24 (2.8%) | 254 (20%) | 5 (2.6%) | 120 (9.1%) | 283 (4.8%) | |
Urban/Rural Status | 50,000 | |||||||
Urban County | 30,228 (85%) | 4,646 (94%) | 531 (62%) | 1,216 (98%) | 187 (97%) | 1,168 (88%) | 5,593 (95%) | |
Rural County | 5,367 (15%) | 276 (5.6%) | 326 (38%) | 28 (2.3%) | 6 (3.1%) | 152 (12%) | 276 (4.7%) | |
Current Health Care Coverage | 50,000 | |||||||
Employer/Union Insurance | 21,297 (60%) | 2,319 (47%) | 276 (32%) | 805 (65%) | 91 (47%) | 661 (50%) | 2,062 (35%) | |
Private Nongovernmental Insurance | 4,230 (12%) | 418 (8.5%) | 56 (6.5%) | 134 (11%) | 10 (5.2%) | 119 (9.0%) | 504 (8.6%) | |
Medicare | 2,189 (6.1%) | 445 (9.0%) | 62 (7.2%) | 46 (3.7%) | 18 (9.3%) | 86 (6.5%) | 367 (6.3%) | |
Medigap | 24 (<0.1%) | 2 (<0.1%) | 1 (0.1%) | 0 (0%) | 2 (1.0%) | 0 (0%) | 2 (<0.1%) | |
Medicaid | 3,365 (9.5%) | 972 (20%) | 160 (19%) | 95 (7.6%) | 25 (13%) | 220 (17%) | 984 (17%) | |
Children's Health Insurance Program | 13 (<0.1%) | 3 (<0.1%) | 1 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 3 (<0.1%) | |
Military Related Health Care | 893 (2.5%) | 140 (2.8%) | 21 (2.5%) | 23 (1.8%) | 9 (4.7%) | 40 (3.0%) | 100 (1.7%) | |
Indian Health Service | 11 (<0.1%) | 1 (<0.1%) | 172 (20%) | 1 (<0.1%) | 0 (0%) | 37 (2.8%) | 8 (0.1%) | |
State Sponsored Health Plan | 1,265 (3.6%) | 201 (4.1%) | 27 (3.2%) | 52 (4.2%) | 23 (12%) | 60 (4.5%) | 438 (7.5%) | |
Other Governmental Program | 920 (2.6%) | 174 (3.5%) | 27 (3.2%) | 38 (3.1%) | 8 (4.1%) | 32 (2.4%) | 230 (3.9%) | |
No Coverage of Any Type | 1,388 (3.9%) | 247 (5.0%) | 54 (6.3%) | 50 (4.0%) | 7 (3.6%) | 65 (4.9%) | 1,171 (20%) | |
Personal Health Care Provider | 50,000 | |||||||
No | 2,323 (6.5%) | 380 (7.7%) | 113 (13%) | 166 (13%) | 22 (11%) | 127 (9.6%) | 1,310 (22%) | |
Yes only one | 18,861 (53%) | 2,497 (51%) | 451 (53%) | 689 (55%) | 105 (54%) | 639 (48%) | 2,876 (49%) | |
More than one | 14,411 (40%) | 2,045 (42%) | 293 (34%) | 389 (31%) | 66 (34%) | 554 (42%) | 1,683 (29%) | |
Could not afford to see Doctor (past 12 months) | 50,000 | |||||||
No | 31,548 (89%) | 4,170 (85%) | 717 (84%) | 1,117 (90%) | 159 (82%) | 1,086 (82%) | 4,539 (77%) | |
Yes | 4,047 (11%) | 752 (15%) | 140 (16%) | 127 (10%) | 34 (18%) | 234 (18%) | 1,330 (23%) | |
Time since last routine checkup | 50,000 | |||||||
Within past year | 30,309 (85%) | 4,411 (90%) | 719 (84%) | 1,030 (83%) | 164 (85%) | 1,074 (81%) | 4,736 (81%) | |
Never | 81 (0.2%) | 4 (<0.1%) | 1 (0.1%) | 10 (0.8%) | 1 (0.5%) | 1 (<0.1%) | 78 (1.3%) | |
Within past 2 years | 3,708 (10%) | 422 (8.6%) | 103 (12%) | 169 (14%) | 21 (11%) | 175 (13%) | 836 (14%) | |
Within Past 5 years | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | |
5 or more years ago | 1,295 (3.6%) | 54 (1.1%) | 30 (3.5%) | 28 (2.3%) | 6 (3.1%) | 61 (4.6%) | 170 (2.9%) | |
Don't Know/Not Sure | 202 (0.6%) | 31 (0.6%) | 4 (0.5%) | 7 (0.6%) | 1 (0.5%) | 9 (0.7%) | 49 (0.8%) | |
1Analytic sample includes 50,000 complete-case observations. Missing values were excluded listwise for all variables included in this table. | ||||||||
# Table of Cervical Screening Distribution
brfss_analytic |>
select(cervicalscr_fac) |>
tbl_summary(
label = list(
cervicalscr_fac ~ "Every Had a Cervical Cancer Screening"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Cervical Cancer Screening Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| Every Had a Cervical Cancer Screening | 50,000 | |
| Â Â Â Â Not screened | 5,875 (12%) | |
| Â Â Â Â Screened | 44,125 (88%) | |
| 1 n (%) | ||
# Table of Race Distribution
brfss_analytic |>
select(race_cat) |>
tbl_summary(
label = list(
race_cat ~ "Race Category"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Race Category Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| Race Category | 50,000 | |
| Â Â Â Â White | 35,595 (71%) | |
| Â Â Â Â Black | 4,922 (9.8%) | |
| Â Â Â Â American Indian or Alaska Native | 857 (1.7%) | |
| Â Â Â Â Asian | 1,244 (2.5%) | |
| Â Â Â Â Native Hawaiian or other Pacific Islander | 193 (0.4%) | |
| Â Â Â Â Multiracial | 1,320 (2.6%) | |
| Â Â Â Â Hispanic | 5,869 (12%) | |
| 1 n (%) | ||
# Table of Age Distribution
brfss_analytic |>
select(age) |>
tbl_summary(
label = list(
age ~ "Age Category"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Age Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| Age Category | 50,000 | |
| Â Â Â Â 25-29 | 4,021 (8.0%) | |
| Â Â Â Â 30-34 | 4,842 (9.7%) | |
| Â Â Â Â 35-39 | 5,756 (12%) | |
| Â Â Â Â 40-44 | 6,288 (13%) | |
| Â Â Â Â 45-49 | 6,122 (12%) | |
| Â Â Â Â 50-54 | 6,662 (13%) | |
| Â Â Â Â 55-59 | 7,418 (15%) | |
| Â Â Â Â 60-64 | 8,891 (18%) | |
| 1 n (%) | ||
# Table of Level of Education Completed Distribution
brfss_analytic |>
select(education) |>
tbl_summary(
label = list(
education ~ "Level of Education Completed"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Level of Education Completed Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| Level of Education Completed | 50,000 | |
| Â Â Â Â Less than high school | 2,376 (4.8%) | |
| Â Â Â Â High school graduate | 9,453 (19%) | |
| Â Â Â Â Some college | 13,161 (26%) | |
| Â Â Â Â College graduate | 25,010 (50%) | |
| 1 n (%) | ||
# Table of Current Employment Status Distribution
brfss_analytic |>
select(employ1_cat) |>
tbl_summary(
label = list(
employ1_cat ~ "Current Employment Status"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Current Employment Status Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| Current Employment Status | 50,000 | |
| Â Â Â Â Employed for wages | 30,980 (62%) | |
| Â Â Â Â Self-employed | 4,196 (8.4%) | |
| Â Â Â Â Out of work for 1 year or more | 1,220 (2.4%) | |
| Â Â Â Â Out of work for less than 1 year | 1,445 (2.9%) | |
| Â Â Â Â A homemaker | 4,023 (8.0%) | |
| Â Â Â Â A student | 675 (1.4%) | |
| Â Â Â Â Retired | 3,158 (6.3%) | |
| Â Â Â Â Unable to work | 4,303 (8.6%) | |
| 1 n (%) | ||
# Table of General Health Status Distribution
brfss_analytic |>
select(gen_health) |>
tbl_summary(
label = list(
gen_health ~ "General Health Status"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted General Health Status Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| General Health Status | 50,000 | |
| Â Â Â Â Excellent | 7,184 (14%) | |
| Â Â Â Â Very Good | 17,042 (34%) | |
| Â Â Â Â Good | 16,493 (33%) | |
| Â Â Â Â Fair | 7,069 (14%) | |
| Â Â Â Â Poor | 2,212 (4.4%) | |
| 1 n (%) | ||
# Tabke of Income Category Distribution
brfss_analytic |>
select(income) |>
tbl_summary(
label = list(
income ~ "Income Category"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Income Category Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| Income Category | 50,000 | |
| Â Â Â Â Less than $15,000 | 3,091 (6.2%) | |
| Â Â Â Â $15,000 to < $25,000 | 4,014 (8.0%) | |
| Â Â Â Â $25,000 to < $35,000 | 4,654 (9.3%) | |
| Â Â Â Â $35,000 to < $50,000 | 5,646 (11%) | |
| Â Â Â Â $50,000 to < $100,000 | 14,537 (29%) | |
| Â Â Â Â $100,000 to < $200,000 | 13,283 (27%) | |
| Â Â Â Â More than $200,000 | 4,775 (9.6%) | |
| 1 n (%) | ||
# Table of Ubran/Rural Status Distribution
brfss_analytic |>
select(urbstat1) |>
tbl_summary(
label = list(
urbstat1 ~ "Urban/Rural Status"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Urban Rural Status Distribution and Percentages") | Characteristic | N | N = 50,0001 |
|---|---|---|
| Urban/Rural Status | 50,000 | |
| Â Â Â Â Urban County | 43,569 (87%) | |
| Â Â Â Â Rural County | 6,431 (13%) | |
| 1 n (%) | ||
# Table of Current Health Care Coverage Distribution
brfss_analytic |>
select(priminsur) |>
tbl_summary(
label = list(
priminsur ~ "Current Health Care Coverage"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 1,
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Current Health Care Coverage Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| Current Health Care Coverage | 50,000 | |
| Â Â Â Â Employer/Union Insurance | 27,511 (55%) | |
| Â Â Â Â Private Nongovernmental Insurance | 5,471 (11%) | |
| Â Â Â Â Medicare | 3,213 (6.4%) | |
| Â Â Â Â Medigap | 31 (<0.1%) | |
| Â Â Â Â Medicaid | 5,821 (12%) | |
| Â Â Â Â Children's Health Insurance Program | 20 (<0.1%) | |
| Â Â Â Â Military Related Health Care | 1,226 (2.5%) | |
| Â Â Â Â Indian Health Service | 230 (0.5%) | |
| Â Â Â Â State Sponsored Health Plan | 2,066 (4.1%) | |
| Â Â Â Â Other Governmental Program | 1,429 (2.9%) | |
| Â Â Â Â No Coverage of Any Type | 2,982 (6.0%) | |
| 1 n (%) | ||
# Table of Personal Health Care Provider Distribution
brfss_analytic |>
select(persdoc) |>
tbl_summary(
label = list(
persdoc ~ "Personal Health Care Provider"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Personal Health Care Provider Distribution and Percentages") | Characteristic | N | N = 50,0001 |
|---|---|---|
| Personal Health Care Provider | 50,000 | |
| Â Â Â Â No | 4,441 (8.9%) | |
| Â Â Â Â Yes only one | 26,118 (52%) | |
| Â Â Â Â More than one | 19,441 (39%) | |
| 1 n (%) | ||
# Table of Could not afford to see Doctor Distribution
brfss_analytic |>
select(medcost) |>
tbl_summary(
type = list(medcost ~ "categorical"),
label = list(
medcost ~ "Could not afford to see Doctor (past 12 months)"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Could not afford to see a Doctor Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| Could not afford to see Doctor (past 12 months) | 50,000 | |
| Â Â Â Â No | 43,336 (87%) | |
| Â Â Â Â Yes | 6,664 (13%) | |
| 1 n (%) | ||
# Table of Time since last routine checkup Distribution
brfss_analytic |>
select(checkup) |>
tbl_summary(
label = list(
checkup ~ "Time since last routine checkup"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) |>
add_n() |>
bold_labels() |>
modify_caption("Unweighted Time since last routine checkup Distribution and Percentages")| Characteristic | N | N = 50,0001 |
|---|---|---|
| Time since last routine checkup | 50,000 | |
| Â Â Â Â Within past year | 42,443 (85%) | |
| Â Â Â Â Never | 176 (0.4%) | |
| Â Â Â Â Within past 2 years | 5,434 (11%) | |
| Â Â Â Â Within Past 5 years | 0 (0%) | |
| Â Â Â Â 5 or more years ago | 1,644 (3.3%) | |
| Â Â Â Â Don't Know/Not Sure | 303 (0.6%) | |
| 1 n (%) | ||
# Create a bar plot for cervical cancer screening count
ggplot(brfss_analytic, aes(x = cervicalscr_fac)) +
geom_bar() +
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5) +
labs(title = "Bar Graph of Cervical Cancer Screening with Counts",
x = "Cervical Cancer Screening",
y = "Count") +
theme_minimal()# Create bar plot demonstrating the proportion of cervical cancer screening by race/ethnicity
brfss_analytic |>
group_by(race_cat) |>
summarize(prop_screened = mean(cervicalscr_fac == "Screened")) |>
ggplot(aes(x = race_cat, y = prop_screened)) +
geom_col(fill = "steelblue") +
labs(
x = "Race/Ethnicity",
y = "Proportion Screened",
title = "Proportion of Cervical Cancer Screening by Race/Ethnicity"
) +
theme_minimal()# Create split bar plot for race/ethnicity
ggplot(brfss_analytic, aes(x = race_cat, fill = cervicalscr_fac)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Race/Ethnicity",
y = "Percent",
fill = "Screened",
title = "Cervical Screening Distribution by Race/Ethnicity"
) +
theme_minimal()# Create race levels
race_levels <- unique(brfss_analytic$race_cat)
# Create table with race levels
tbl <- map_df(race_levels, function(r) {
# subset design for this race group
d_sub <- subset(brfss_design, race_cat == r)
# compute survey-weighted proportion + CI
est <- svyciprop(~I(cervicalscr_fac == "Screened"),
design = d_sub,
method = "logit")
# return a clean row
tibble(
race_cat = r,
prop = as.numeric(coef(est)),
lower = as.numeric(confint(est)[1]),
upper = as.numeric(confint(est)[2])
)
})
# Create the plot with confidence intervals
ggplot(tbl, aes(x = race_cat, y = prop)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2) +
labs(
x = "Race/Ethnicity",
y = "Weighted Proportion Screened",
title = "Survey-Weighted Screening Proportions with 95% Confidence Intervals"
) +
theme_minimal()# Create split bar plot for education levels
ggplot(brfss_analytic, aes(x = education, fill = cervicalscr_fac)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Level of Education",
y = "Percent",
fill = "Screened",
title = "Cervical Screening Distribution by Level of Education"
) +
theme_minimal()# Create split bar plot for employment status
ggplot(brfss_analytic, aes(x = employ1_cat, fill = cervicalscr_fac)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Employment Status",
y = "Percent",
fill = "Screened",
title = "Cervical Screening Distribution by Employment Status"
) +
theme_minimal()# Create split bar plot for personal doctor status
ggplot(brfss_analytic, aes(x = persdoc, fill = cervicalscr_fac)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Personal Doctor Status",
y = "Percent",
fill = "Screened",
title = "Cervical Screening Distribution by Personal Doctor Status"
) +
theme_minimal()# Create split bar plot for age categories
ggplot(brfss_analytic, aes(x = age, fill = cervicalscr_fac)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Age Category (Years)",
y = "Percent",
fill = "Screened",
title = "Cervical Screening Distribution by Age Category"
) +
theme_minimal()# Create model limited that only includes exposure and outcome
model_limited <- svyglm(cervicalscr_bin ~ race_cat, data = brfss_analytic, design = brfss_design, family = "quasibinomial")
# Create a nice table
tidy(model_limited, conf.int = FALSE) |>
mutate(
term = dplyr::recode(term,
"(Intercept)" = "Intercept",
"race_cat" = "Racial Identity"),
across(where(is.numeric), ~ round(., 4))
) |>
kable(
caption = "Survey Weighted Quasibionomial Regression: Cervical Cancer Screening by Race Category (BRFSS 2024, n = 50,000)",
col.names = c("Term", "Estimate", "Std. Error", "t-statistic", "p-value"),
format = "html"
) |>
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = TRUE) |>
row_spec(0, bold = TRUE)| Term | Estimate | Std. Error | t-statistic | p-value |
|---|---|---|---|---|
| Intercept | 2.2950 | 0.0358 | 64.1482 | 0.0000 |
| race_catBlack | -0.7992 | 0.0870 | -9.1877 | 0.0000 |
| race_catAmerican Indian or Alaska Native | -1.1544 | 0.2344 | -4.9252 | 0.0000 |
| race_catAsian | -1.1270 | 0.1429 | -7.8873 | 0.0000 |
| race_catNative Hawaiian or other Pacific Islander | -0.5332 | 0.2697 | -1.9772 | 0.0480 |
| race_catMultiracial | -0.0538 | 0.1636 | -0.3287 | 0.7424 |
| race_catHispanic | -0.8943 | 0.0802 | -11.1502 | 0.0000 |
# Create model full that includes exposure and outcome and all covariates
model <- svyglm(cervicalscr_bin ~ race_cat + age + education + employ1_cat + gen_health + income + urbstat1 + priminsur + persdoc + medcost + checkup, data = brfss_analytic, design = brfss_design, family = "quasibinomial")
# Create a nice table
tidy(model, conf.int = FALSE) |>
mutate(
term = dplyr::recode(term,
"(Intercept)" = "Intercept",
"race_cat" = "Racial Identity",
"age" = "Age Category",
"education" = "Level of Education Completed",
"employ1_cat" = "Current Employment Status",
"gen_health" = "General Health Status",
"income" = "Income Category",
"urbstat1" = "Urban/Rural Status",
"priminsur" = "Current Health Care Coverage",
"persdoc" = "Personal Health Care Provider",
"medcost" = "Could not afford to see Doctor (past 12 months)",
"checkup" = "Time since last routine checkup"
),
across(where(is.numeric), ~ round(., 4))
) |>
kable(
caption = "Survey Weighted Quasibinomial Regression: Cervical Cancer Screening and all other covariates (BRFSS 2024, n = 50,000)",
col.names = c("Term", "Estimate", "Std. Error", "t-statistic", "p-value"),
format = "html"
) |>
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = TRUE) |>
row_spec(0, bold = TRUE)| Term | Estimate | Std. Error | t-statistic | p-value |
|---|---|---|---|---|
| Intercept | 0.0548 | 0.2245 | 0.2439 | 0.8073 |
| race_catBlack | -0.6425 | 0.0942 | -6.8226 | 0.0000 |
| race_catAmerican Indian or Alaska Native | -0.8781 | 0.3047 | -2.8816 | 0.0040 |
| race_catAsian | -1.3482 | 0.1399 | -9.6334 | 0.0000 |
| race_catNative Hawaiian or other Pacific Islander | -0.3514 | 0.2874 | -1.2227 | 0.2214 |
| race_catMultiracial | 0.1172 | 0.1787 | 0.6559 | 0.5119 |
| race_catHispanic | -0.2689 | 0.0843 | -3.1907 | 0.0014 |
| age30-34 | 0.5051 | 0.1214 | 4.1588 | 0.0000 |
| age35-39 | 0.7203 | 0.1205 | 5.9787 | 0.0000 |
| age40-44 | 0.6609 | 0.1244 | 5.3113 | 0.0000 |
| age45-49 | 0.8489 | 0.1411 | 6.0168 | 0.0000 |
| age50-54 | 0.6301 | 0.1302 | 4.8402 | 0.0000 |
| age55-59 | 0.5513 | 0.1201 | 4.5896 | 0.0000 |
| age60-64 | 0.5678 | 0.1184 | 4.7967 | 0.0000 |
| educationHigh school graduate | 0.5656 | 0.1145 | 4.9411 | 0.0000 |
| educationSome college | 0.8940 | 0.1140 | 7.8446 | 0.0000 |
| educationCollege graduate | 1.1093 | 0.1254 | 8.8459 | 0.0000 |
| employ1_catSelf-employed | -0.2505 | 0.1459 | -1.7176 | 0.0859 |
| employ1_catOut of work for 1 year or more | -0.2821 | 0.1485 | -1.8996 | 0.0575 |
| employ1_catOut of work for less than 1 year | 0.1284 | 0.1813 | 0.7082 | 0.4788 |
| employ1_catA homemaker | 0.1152 | 0.1201 | 0.9597 | 0.3372 |
| employ1_catA student | 0.1192 | 0.2109 | 0.5655 | 0.5717 |
| employ1_catRetired | -0.2669 | 0.1492 | -1.7891 | 0.0736 |
| employ1_catUnable to work | 0.0114 | 0.1215 | 0.0936 | 0.9254 |
| gen_healthVery Good | 0.3111 | 0.1060 | 2.9362 | 0.0033 |
| gen_healthGood | -0.0273 | 0.1071 | -0.2548 | 0.7989 |
| gen_healthFair | 0.1099 | 0.1172 | 0.9371 | 0.3487 |
| gen_healthPoor | -0.0134 | 0.1585 | -0.0845 | 0.9327 |
| income$15,000 to < $25,000 | 0.1702 | 0.1334 | 1.2754 | 0.2022 |
| income$25,000 to < $35,000 | 0.1551 | 0.1262 | 1.2293 | 0.2190 |
| income$35,000 to < $50,000 | 0.1792 | 0.1316 | 1.3620 | 0.1732 |
| income$50,000 to < $100,000 | 0.4920 | 0.1326 | 3.7102 | 0.0002 |
| income$100,000 to < $200,000 | 0.8430 | 0.1516 | 5.5606 | 0.0000 |
| incomeMore than $200,000 | 1.2860 | 0.2046 | 6.2850 | 0.0000 |
| urbstat1Rural County | 0.0061 | 0.0898 | 0.0679 | 0.9459 |
| priminsurPrivate Nongovernmental Insurance | -0.0942 | 0.1268 | -0.7431 | 0.4574 |
| priminsurMedicare | -0.2645 | 0.1310 | -2.0198 | 0.0434 |
| priminsurMedigap | -0.4408 | 0.7095 | -0.6213 | 0.5344 |
| priminsurMedicaid | -0.0206 | 0.1186 | -0.1740 | 0.8619 |
| priminsurChildren’s Health Insurance Program | 1.3137 | 1.5271 | 0.8602 | 0.3897 |
| priminsurMilitary Related Health Care | -0.1448 | 0.2268 | -0.6382 | 0.5233 |
| priminsurIndian Health Service | -0.0419 | 0.4286 | -0.0978 | 0.9221 |
| priminsurState Sponsored Health Plan | 0.1611 | 0.1483 | 1.0868 | 0.2771 |
| priminsurOther Governmental Program | -0.1239 | 0.1697 | -0.7300 | 0.4654 |
| priminsurNo Coverage of Any Type | -0.1517 | 0.1409 | -1.0763 | 0.2818 |
| persdocYes only one | 0.3172 | 0.0984 | 3.2242 | 0.0013 |
| persdocMore than one | 0.4993 | 0.1102 | 4.5304 | 0.0000 |
| medcostYes | 0.0386 | 0.0844 | 0.4569 | 0.6477 |
| checkupNever | -0.7603 | 0.2753 | -2.7617 | 0.0058 |
| checkupWithin past 2 years | -0.1598 | 0.0935 | -1.7079 | 0.0877 |
| checkup5 or more years ago | -0.4645 | 0.1458 | -3.1858 | 0.0014 |
| checkupDon’t Know/Not Sure | -1.4106 | 0.3703 | -3.8094 | 0.0001 |
# Extract estimates and make OR and 95% CI
tidy_ci_lim <- tidy(model_limited, conf.int = FALSE, exponentiate = TRUE) |>
mutate(
conf.low = exp(estimate - 1.96 * std.error),
conf.high = exp(estimate + 1.96 * std.error)
)
# Recode it and put it into a tidy table
tidy_table <- tidy_ci_lim |>
mutate(
term = dplyr::recode(term,
"(Intercept)" = "Intercept",
"race_cat" = "Racial Identity"
),
estimate = round(estimate, 4),
conf.low = round(conf.low, 4),
conf.high = round(conf.high, 4),
p.value = round(p.value, 4)
) |>
select(
term,
estimate,
conf.low,
conf.high,
p.value
)
tidy_table |>
kable(
caption = "Table 2. Survey-Weighted Quasibnomial Regression Results: Cervical Cancer Screening by Race Category (BRFSS 2024, n = 50,000)",
col.names = c("Term", "Odds Ratio", "95% CI Lower", "95% CI Upper", "p-value"),
format = "html"
) |>
kable_styling(
bootstrap_options = c("striped", "hover"),
full_width = TRUE
) |>
row_spec(0, bold = TRUE)| Term | Odds Ratio | 95% CI Lower | 95% CI Upper | p-value |
|---|---|---|---|---|
| Intercept | 9.9245 | 19040.6602 | 21907.3190 | 0.0000 |
| race_catBlack | 0.4497 | 1.3221 | 1.8593 | 0.0000 |
| race_catAmerican Indian or Alaska Native | 0.3153 | 0.8658 | 2.1698 | 0.0000 |
| race_catAsian | 0.3240 | 1.0449 | 1.8295 | 0.0000 |
| race_catNative Hawaiian or other Pacific Islander | 0.5868 | 1.0600 | 3.0504 | 0.0480 |
| race_catMultiracial | 0.9477 | 1.8721 | 3.5546 | 0.7424 |
| race_catHispanic | 0.4089 | 1.2862 | 1.7614 | 0.0000 |
# Create a publication ready table
model_limited |>
tbl_regression(
exponentiate = TRUE,
label = list(race_cat ~ "Racial Identity")
) |>
modify_caption(
"**Table 2. Survey-Weighted Quasibinomial Regression Results: Cervical Cancer Screening by Race Category (BRFSS 2024, n = 50,000)**"
) |>
bold_labels() |>
bold_p()| Characteristic | OR | 95% CI | p-value |
|---|---|---|---|
| Racial Identity | |||
|     White | — | — | |
| Â Â Â Â Black | 0.45 | 0.38, 0.53 | <0.001 |
| Â Â Â Â American Indian or Alaska Native | 0.32 | 0.20, 0.50 | <0.001 |
| Â Â Â Â Asian | 0.32 | 0.24, 0.43 | <0.001 |
| Â Â Â Â Native Hawaiian or other Pacific Islander | 0.59 | 0.35, 1.00 | 0.048 |
| Â Â Â Â Multiracial | 0.95 | 0.69, 1.31 | 0.7 |
| Â Â Â Â Hispanic | 0.41 | 0.35, 0.48 | <0.001 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
# Extract estimates and make OR and 95% CI
tidy_ci <- tidy(model, conf.int = FALSE, exponentiate = TRUE) |>
mutate(
conf.low = exp(estimate - 1.96 * std.error),
conf.high = exp(estimate + 1.96 * std.error)
)
# Recode it and put it into a tidy table
tidy_table <- tidy_ci |>
mutate(
term = dplyr::recode(term,
"(Intercept)" = "Intercept",
"race_cat" = "Racial Identity",
"age" = "Age Category",
"education" = "Level of Education Completed",
"employ1_cat" = "Current Employment Status",
"gen_health" = "General Health Status",
"income" = "Income Category",
"urbstat1" = "Urban/Rural Status",
"priminsur" = "Current Health Care Coverage",
"persdoc" = "Personal Health Care Provider",
"medcost" = "Could not afford to see Doctor (past 12 months)",
"checkup" = "Time since last routine checkup"
),
estimate = round(estimate, 4),
conf.low = round(conf.low, 4),
conf.high = round(conf.high, 4),
p.value = round(p.value, 4)
) |>
select(
term,
estimate,
conf.low,
conf.high,
p.value
)
tidy_table |>
kable(
caption = "Table 3. Survey-Weighted Quasibinomial Regression Results: Cervical Cancer Screening and all other covariates (BRFSS 2024, n = 50,000)",
col.names = c("Term", "Odds Ratio", "95% CI Lower", "95% CI Upper", "p-value"),
format = "html"
) |>
kable_styling(
bootstrap_options = c("striped", "hover"),
full_width = TRUE
) |>
row_spec(0, bold = TRUE)| Term | Odds Ratio | 95% CI Lower | 95% CI Upper | p-value |
|---|---|---|---|---|
| Intercept | 1.0563 | 1.8519 | 4.4654 | 0.8073 |
| race_catBlack | 0.5260 | 1.4069 | 2.0351 | 0.0000 |
| race_catAmerican Indian or Alaska Native | 0.4156 | 0.8339 | 2.7534 | 0.0040 |
| race_catAsian | 0.2597 | 0.9855 | 1.7058 | 0.0000 |
| race_catNative Hawaiian or other Pacific Islander | 0.7037 | 1.1508 | 3.5501 | 0.2214 |
| race_catMultiracial | 1.1243 | 2.1687 | 4.3689 | 0.5119 |
| race_catHispanic | 0.7642 | 1.8204 | 2.5330 | 0.0014 |
| age30-34 | 1.6571 | 4.1332 | 6.6532 | 0.0000 |
| age35-39 | 2.0551 | 6.1656 | 9.8877 | 0.0000 |
| age40-44 | 1.9366 | 5.4341 | 8.8507 | 0.0000 |
| age45-49 | 2.3371 | 7.8505 | 13.6489 | 0.0000 |
| age50-54 | 1.8778 | 5.0667 | 8.4403 | 0.0000 |
| age55-59 | 1.7355 | 4.4818 | 7.1770 | 0.0000 |
| age60-64 | 1.7643 | 4.6289 | 7.3618 | 0.0000 |
| educationHigh school graduate | 1.7604 | 4.6464 | 7.2774 | 0.0000 |
| educationSome college | 2.4449 | 9.2218 | 14.4157 | 0.0000 |
| educationCollege graduate | 3.0323 | 16.2240 | 26.5248 | 0.0000 |
| employ1_catSelf-employed | 0.7784 | 1.6365 | 2.8987 | 0.0859 |
| employ1_catOut of work for 1 year or more | 0.7542 | 1.5890 | 2.8442 | 0.0575 |
| employ1_catOut of work for less than 1 year | 1.1370 | 2.1850 | 4.4481 | 0.4788 |
| employ1_catA homemaker | 1.1221 | 2.4273 | 3.8864 | 0.3372 |
| employ1_catA student | 1.1266 | 2.0408 | 4.6641 | 0.5717 |
| employ1_catRetired | 0.7658 | 1.6054 | 2.8810 | 0.0736 |
| employ1_catUnable to work | 1.0114 | 2.1669 | 3.4888 | 0.9254 |
| gen_healthVery Good | 1.3650 | 3.1812 | 4.8193 | 0.0033 |
| gen_healthGood | 0.9731 | 2.1451 | 3.2641 | 0.7989 |
| gen_healthFair | 1.1161 | 2.4262 | 3.8417 | 0.3487 |
| gen_healthPoor | 0.9867 | 1.9663 | 3.6593 | 0.9327 |
| income$15,000 to < $25,000 | 1.1855 | 2.5193 | 4.2506 | 0.2022 |
| income$25,000 to < $35,000 | 1.1678 | 2.5105 | 4.1169 | 0.2190 |
| income$35,000 to < $50,000 | 1.1963 | 2.5558 | 4.2809 | 0.1732 |
| income$50,000 to < $100,000 | 1.6355 | 3.9576 | 6.6554 | 0.0002 |
| income$100,000 to < $200,000 | 2.3233 | 7.5849 | 13.7415 | 0.0000 |
| incomeMore than $200,000 | 3.6183 | 24.9585 | 55.6617 | 0.0000 |
| urbstat1Rural County | 1.0061 | 2.2934 | 3.2616 | 0.9459 |
| priminsurPrivate Nongovernmental Insurance | 0.9101 | 1.9378 | 3.1855 | 0.4574 |
| priminsurMedicare | 0.7676 | 1.6667 | 2.7850 | 0.0434 |
| priminsurMedigap | 0.6435 | 0.4738 | 7.6453 | 0.5344 |
| priminsurMedicaid | 0.9796 | 2.1110 | 3.3603 | 0.8619 |
| priminsurChildren’s Health Insurance Program | 3.7199 | 2.0682 | 823.0803 | 0.3897 |
| priminsurMilitary Related Health Care | 0.8652 | 1.5230 | 3.7054 | 0.5233 |
| priminsurIndian Health Service | 0.9589 | 1.1261 | 6.0442 | 0.9221 |
| priminsurState Sponsored Health Plan | 1.1748 | 2.4212 | 4.3293 | 0.2771 |
| priminsurOther Governmental Program | 0.8835 | 1.7348 | 3.3740 | 0.4654 |
| priminsurNo Coverage of Any Type | 0.8593 | 1.7915 | 3.1127 | 0.2818 |
| persdocYes only one | 1.3733 | 3.2558 | 4.7880 | 0.0013 |
| persdocMore than one | 1.6475 | 4.1850 | 6.4464 | 0.0000 |
| medcostYes | 1.0393 | 2.3963 | 3.3357 | 0.6477 |
| checkupNever | 0.4675 | 0.9305 | 2.7377 | 0.0058 |
| checkupWithin past 2 years | 0.8523 | 1.9523 | 2.8171 | 0.0877 |
| checkup5 or more years ago | 0.6285 | 1.4088 | 2.4948 | 0.0014 |
| checkupDon’t Know/Not Sure | 0.2440 | 0.6177 | 2.6373 | 0.0001 |
# Create a publication ready table
model |>
tbl_regression(
exponentiate = TRUE,
label = list(
race_cat ~ "Racial Identity",
age ~ "Age Category",
education ~ "Level of Education Completed",
employ1_cat ~ "Current Employment Status",
gen_health ~ "General Health Status",
income ~ "Income Category",
urbstat1 ~ "Urban/Rural Status",
priminsur ~ "Current Health Care Coverage",
persdoc ~ "Personal Health Care Provider",
medcost ~ "Could not afford to see Doctor (past 12 months)",
checkup ~ "Time since last routine checkup"
)
) |>
modify_caption(
"**Table 3. Survey-Weighted Quasibinomial Regression Results: Cervical Cancer Screening and All Covariates (BRFSS 2024, n = 50,000)**"
) |>
bold_labels() |>
bold_p()| Characteristic | OR | 95% CI | p-value |
|---|---|---|---|
| Racial Identity | |||
|     White | — | — | |
| Â Â Â Â Black | 0.53 | 0.44, 0.63 | <0.001 |
| Â Â Â Â American Indian or Alaska Native | 0.42 | 0.23, 0.76 | 0.004 |
| Â Â Â Â Asian | 0.26 | 0.20, 0.34 | <0.001 |
| Â Â Â Â Native Hawaiian or other Pacific Islander | 0.70 | 0.40, 1.24 | 0.2 |
| Â Â Â Â Multiracial | 1.12 | 0.79, 1.60 | 0.5 |
| Â Â Â Â Hispanic | 0.76 | 0.65, 0.90 | 0.001 |
| Age Category | |||
|     25-29 | — | — | |
| Â Â Â Â 30-34 | 1.66 | 1.31, 2.10 | <0.001 |
| Â Â Â Â 35-39 | 2.06 | 1.62, 2.60 | <0.001 |
| Â Â Â Â 40-44 | 1.94 | 1.52, 2.47 | <0.001 |
| Â Â Â Â 45-49 | 2.34 | 1.77, 3.08 | <0.001 |
| Â Â Â Â 50-54 | 1.88 | 1.45, 2.42 | <0.001 |
| Â Â Â Â 55-59 | 1.74 | 1.37, 2.20 | <0.001 |
| Â Â Â Â 60-64 | 1.76 | 1.40, 2.23 | <0.001 |
| Level of Education Completed | |||
|     Less than high school | — | — | |
| Â Â Â Â High school graduate | 1.76 | 1.41, 2.20 | <0.001 |
| Â Â Â Â Some college | 2.44 | 1.96, 3.06 | <0.001 |
| Â Â Â Â College graduate | 3.03 | 2.37, 3.88 | <0.001 |
| Current Employment Status | |||
|     Employed for wages | — | — | |
| Â Â Â Â Self-employed | 0.78 | 0.58, 1.04 | 0.086 |
| Â Â Â Â Out of work for 1 year or more | 0.75 | 0.56, 1.01 | 0.057 |
| Â Â Â Â Out of work for less than 1 year | 1.14 | 0.80, 1.62 | 0.5 |
| Â Â Â Â A homemaker | 1.12 | 0.89, 1.42 | 0.3 |
| Â Â Â Â A student | 1.13 | 0.75, 1.70 | 0.6 |
| Â Â Â Â Retired | 0.77 | 0.57, 1.03 | 0.074 |
| Â Â Â Â Unable to work | 1.01 | 0.80, 1.28 | >0.9 |
| General Health Status | |||
|     Excellent | — | — | |
| Â Â Â Â Very Good | 1.36 | 1.11, 1.68 | 0.003 |
| Â Â Â Â Good | 0.97 | 0.79, 1.20 | 0.8 |
| Â Â Â Â Fair | 1.12 | 0.89, 1.40 | 0.3 |
| Â Â Â Â Poor | 0.99 | 0.72, 1.35 | >0.9 |
| Income Category | |||
|     Less than $15,000 | — | — | |
| Â Â Â Â $15,000 to < $25,000 | 1.19 | 0.91, 1.54 | 0.2 |
| Â Â Â Â $25,000 to < $35,000 | 1.17 | 0.91, 1.50 | 0.2 |
| Â Â Â Â $35,000 to < $50,000 | 1.20 | 0.92, 1.55 | 0.2 |
| Â Â Â Â $50,000 to < $100,000 | 1.64 | 1.26, 2.12 | <0.001 |
| Â Â Â Â $100,000 to < $200,000 | 2.32 | 1.73, 3.13 | <0.001 |
| Â Â Â Â More than $200,000 | 3.62 | 2.42, 5.40 | <0.001 |
| Urban/Rural Status | |||
|     Urban County | — | — | |
| Â Â Â Â Rural County | 1.01 | 0.84, 1.20 | >0.9 |
| Current Health Care Coverage | |||
|     Employer/Union Insurance | — | — | |
| Â Â Â Â Private Nongovernmental Insurance | 0.91 | 0.71, 1.17 | 0.5 |
| Â Â Â Â Medicare | 0.77 | 0.59, 0.99 | 0.043 |
| Â Â Â Â Medigap | 0.64 | 0.16, 2.59 | 0.5 |
| Â Â Â Â Medicaid | 0.98 | 0.78, 1.24 | 0.9 |
| Â Â Â Â Children's Health Insurance Program | 3.72 | 0.19, 74.2 | 0.4 |
| Â Â Â Â Military Related Health Care | 0.87 | 0.55, 1.35 | 0.5 |
| Â Â Â Â Indian Health Service | 0.96 | 0.41, 2.22 | >0.9 |
| Â Â Â Â State Sponsored Health Plan | 1.17 | 0.88, 1.57 | 0.3 |
| Â Â Â Â Other Governmental Program | 0.88 | 0.63, 1.23 | 0.5 |
| Â Â Â Â No Coverage of Any Type | 0.86 | 0.65, 1.13 | 0.3 |
| Personal Health Care Provider | |||
|     No | — | — | |
| Â Â Â Â Yes only one | 1.37 | 1.13, 1.67 | 0.001 |
| Â Â Â Â More than one | 1.65 | 1.33, 2.04 | <0.001 |
| Could not afford to see Doctor (past 12 months) | |||
|     No | — | — | |
| Â Â Â Â Yes | 1.04 | 0.88, 1.23 | 0.6 |
| Time since last routine checkup | |||
|     Within past year | — | — | |
| Â Â Â Â Never | 0.47 | 0.27, 0.80 | 0.006 |
| Â Â Â Â Within past 2 years | 0.85 | 0.71, 1.02 | 0.088 |
| Â Â Â Â 5 or more years ago | 0.63 | 0.47, 0.84 | 0.001 |
| Â Â Â Â Don't Know/Not Sure | 0.24 | 0.12, 0.50 | <0.001 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
# Extract all race coefficients (excluding intercept)
race_coefs0 <- coef(model_limited)[grep("^race_cat", names(coef(model_limited)))]
# Select covariates to test
covars <- c("age", "education", "employ1_cat", "gen_health",
"income", "urbstat1", "priminsur", "persdoc", "medcost", "checkup")
# Create a loop to test each covariate
results_or <- map_dfr(covars, function(v) {
f <- as.formula(paste("cervicalscr_bin ~ race_cat +", v))
m <- svyglm(f, design = brfss_design, family = quasibinomial())
race_adj <- coef(m)[grep("^race_cat", names(coef(m)))]
race_unadj <- race_coefs0[names(race_adj)]
OR_unadj <- exp(race_unadj)
OR_adj <- exp(race_adj)
tibble(
covariate = v,
race_level = str_remove(names(race_adj), "race_cat"),
Odds_ratio_unadjusted = OR_unadj,
Odds_ratio_adjusted = OR_adj,
abs_change = abs(OR_adj - OR_unadj),
percent_change = 100 * (OR_adj - OR_unadj) / OR_unadj
)
})
results_or |>
mutate(across(where(is.numeric), ~ round(.x, 3))) |>
arrange(race_level, desc(abs_change)) |>
knitr::kable(
caption = "Survey-Weighted Change in Race Odds Ratios After Adding Each Covariate"
) |>
kableExtra::kable_styling(
bootstrap_options = c("striped", "hover"),
full_width = FALSE
)| covariate | race_level | Odds_ratio_unadjusted | Odds_ratio_adjusted | abs_change | percent_change |
|---|---|---|---|---|---|
| income | American Indian or Alaska Native | 0.315 | 0.454 | 0.139 | 43.981 |
| education | American Indian or Alaska Native | 0.315 | 0.370 | 0.055 | 17.392 |
| priminsur | American Indian or Alaska Native | 0.315 | 0.369 | 0.053 | 16.967 |
| gen_health | American Indian or Alaska Native | 0.315 | 0.340 | 0.024 | 7.726 |
| persdoc | American Indian or Alaska Native | 0.315 | 0.339 | 0.024 | 7.577 |
| employ1_cat | American Indian or Alaska Native | 0.315 | 0.338 | 0.023 | 7.137 |
| age | American Indian or Alaska Native | 0.315 | 0.303 | 0.012 | -3.795 |
| medcost | American Indian or Alaska Native | 0.315 | 0.326 | 0.011 | 3.398 |
| checkup | American Indian or Alaska Native | 0.315 | 0.306 | 0.009 | -2.995 |
| urbstat1 | American Indian or Alaska Native | 0.315 | 0.318 | 0.003 | 0.875 |
| education | Asian | 0.324 | 0.245 | 0.079 | -24.249 |
| income | Asian | 0.324 | 0.265 | 0.059 | -18.348 |
| priminsur | Asian | 0.324 | 0.296 | 0.028 | -8.664 |
| persdoc | Asian | 0.324 | 0.346 | 0.022 | 6.710 |
| employ1_cat | Asian | 0.324 | 0.310 | 0.014 | -4.253 |
| urbstat1 | Asian | 0.324 | 0.317 | 0.007 | -2.183 |
| medcost | Asian | 0.324 | 0.317 | 0.007 | -2.176 |
| gen_health | Asian | 0.324 | 0.318 | 0.006 | -1.957 |
| checkup | Asian | 0.324 | 0.328 | 0.004 | 1.370 |
| age | Asian | 0.324 | 0.326 | 0.002 | 0.712 |
| income | Black | 0.450 | 0.573 | 0.124 | 27.487 |
| priminsur | Black | 0.450 | 0.505 | 0.056 | 12.400 |
| gen_health | Black | 0.450 | 0.475 | 0.025 | 5.531 |
| checkup | Black | 0.450 | 0.426 | 0.024 | -5.235 |
| education | Black | 0.450 | 0.469 | 0.019 | 4.199 |
| persdoc | Black | 0.450 | 0.443 | 0.007 | -1.452 |
| employ1_cat | Black | 0.450 | 0.456 | 0.006 | 1.368 |
| urbstat1 | Black | 0.450 | 0.443 | 0.006 | -1.408 |
| medcost | Black | 0.450 | 0.455 | 0.005 | 1.136 |
| age | Black | 0.450 | 0.446 | 0.004 | -0.922 |
| education | Hispanic | 0.409 | 0.634 | 0.225 | 55.045 |
| income | Hispanic | 0.409 | 0.591 | 0.182 | 44.430 |
| priminsur | Hispanic | 0.409 | 0.530 | 0.121 | 29.519 |
| persdoc | Hispanic | 0.409 | 0.472 | 0.063 | 15.410 |
| gen_health | Hispanic | 0.409 | 0.442 | 0.033 | 8.080 |
| employ1_cat | Hispanic | 0.409 | 0.428 | 0.019 | 4.567 |
| medcost | Hispanic | 0.409 | 0.425 | 0.016 | 3.918 |
| urbstat1 | Hispanic | 0.409 | 0.401 | 0.008 | -1.840 |
| age | Hispanic | 0.409 | 0.405 | 0.004 | -1.010 |
| checkup | Hispanic | 0.409 | 0.410 | 0.002 | 0.380 |
| income | Multiracial | 0.948 | 1.093 | 0.145 | 15.317 |
| priminsur | Multiracial | 0.948 | 1.038 | 0.090 | 9.498 |
| education | Multiracial | 0.948 | 1.004 | 0.057 | 5.979 |
| persdoc | Multiracial | 0.948 | 0.984 | 0.036 | 3.852 |
| checkup | Multiracial | 0.948 | 0.975 | 0.027 | 2.885 |
| employ1_cat | Multiracial | 0.948 | 0.973 | 0.025 | 2.683 |
| gen_health | Multiracial | 0.948 | 0.973 | 0.025 | 2.648 |
| medcost | Multiracial | 0.948 | 0.970 | 0.022 | 2.341 |
| age | Multiracial | 0.948 | 0.966 | 0.019 | 1.973 |
| urbstat1 | Multiracial | 0.948 | 0.941 | 0.006 | -0.683 |
| income | Native Hawaiian or other Pacific Islander | 0.587 | 0.708 | 0.121 | 20.585 |
| education | Native Hawaiian or other Pacific Islander | 0.587 | 0.686 | 0.100 | 16.996 |
| priminsur | Native Hawaiian or other Pacific Islander | 0.587 | 0.635 | 0.048 | 8.239 |
| employ1_cat | Native Hawaiian or other Pacific Islander | 0.587 | 0.627 | 0.040 | 6.869 |
| age | Native Hawaiian or other Pacific Islander | 0.587 | 0.551 | 0.036 | -6.099 |
| medcost | Native Hawaiian or other Pacific Islander | 0.587 | 0.620 | 0.034 | 5.728 |
| gen_health | Native Hawaiian or other Pacific Islander | 0.587 | 0.619 | 0.032 | 5.482 |
| persdoc | Native Hawaiian or other Pacific Islander | 0.587 | 0.603 | 0.016 | 2.754 |
| checkup | Native Hawaiian or other Pacific Islander | 0.587 | 0.572 | 0.014 | -2.444 |
| urbstat1 | Native Hawaiian or other Pacific Islander | 0.587 | 0.576 | 0.011 | -1.838 |
# Extract fitted + residuals
df_resid <- data.frame(
fitted = fitted(model),
dev_resid = residuals(model, type = "deviance"),
pearson_resid = residuals(model, type = "pearson")
)
# Deviance residuals vs fitted
ggplot(df_resid, aes(x = fitted, y = dev_resid)) +
geom_point(alpha = 0.3) +
geom_smooth(se = TRUE, color = "green") +
labs(
title = "Deviance Residuals vs Fitted (Survey-Weighted Full Model)",
x = "Fitted Values",
y = "Deviance Residuals"
) +
theme_minimal()# Create predicted and observed values from brfss_analytic and split sample into 10 group (deciles)
calib <- data.frame(
pred = fitted(model),
obs = brfss_analytic$cervicalscr_bin
) |>
mutate(decile = ntile(pred, 10)) |>
group_by(decile) |>
summarize(
mean_pred = mean(pred),
mean_obs = mean(obs)
)
# Plot Predicted vs. Proportion for Calibration
ggplot(calib, aes(x = mean_pred, y = mean_obs)) +
geom_point(size = 3) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
labs(
title = "Calibration Plot (Survey-Weighted Full Model)",
x = "Mean Predicted Probability",
y = "Observed Proportion"
) +
theme_minimal()# Hosmer-Lemeshow Test
hl_test <- ResourceSelection::hoslem.test(
x = as.numeric(brfss_analytic$cervicalscr_bin) - 1,
y = fitted(model),
g = 10
)
hl_test##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: as.numeric(brfss_analytic$cervicalscr_bin) - 1, fitted(model)
## X-squared = 772960, df = 8, p-value < 2.2e-16
# Fit unweighted glm for Cook's Distance
model_glm <- glm(
cervicalscr_bin ~ age + education + race_cat + employ1_cat +
gen_health + income + urbstat1 + priminsur + persdoc +
medcost + checkup,
data = brfss_analytic,
family = binomial()
)
# Number of observations
n <- nrow(brfss_analytic)
# Compute Cook's distance
cooks_d <- cooks.distance(model_glm)
# Identify influential points
influential_flag <- ifelse(cooks_d > 4/n,
"Potentially influential",
"Not influential")
# Count how many exceed the threshold
n_influential <- sum(cooks_d > 4/n)
# Build the augmented dataset
augment <- data.frame(
obs_num = 1:n,
cooks_d = cooks_d,
influential = influential_flag
)
# Plot Cook's Distance
cooks_distance <- ggplot(augment, aes(x = obs_num, y = cooks_d, color = influential)) +
geom_point(alpha = 0.6, size = 1.2) +
geom_hline(yintercept = 4/n, linetype = "dashed",
color = "red", linewidth = 1) +
scale_color_manual(values = c("Potentially influential" = "tomato",
"Not influential" = "steelblue")) +
labs(
title = "Cook's Distance with Unweighted GLM as Approximation",
subtitle = paste0("Dashed line = 4/n threshold | ",
n_influential, " potentially influential observations"),
x = "Observation Number",
y = "Cook's Distance",
color = ""
) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")
# Print Cooks Distance Plot
print(cooks_distance)# Extract leverage for each observation from unweighted logistic regression
lev <- hatvalues(model_glm)
# Compute standard deviance residuals from unweigthted logistic regression
std_dev_resid <- rstandard(model_glm, type = "deviance")
# Create leverage vs. standardized residuals plot
ggplot(data.frame(lev, std_dev_resid),
aes(x = lev, y = std_dev_resid)) +
geom_point(alpha = 0.3) +
geom_smooth(se = TRUE, color = "red") +
labs(
title = "Leverage vs Standardized Deviance Residuals with Unweighted GLM as Approximation",
x = "Leverage",
y = "Standardized Deviance Residuals"
) +
theme_minimal()# Create prediction from ggpredict
pred <- ggpredict(model, terms = "race_cat")
# Plot primary association visualization
ggplot(pred, aes(x = x, y = predicted)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
labs(
x = "Race/Ethnicity",
y = "Adjusted Predicted Probability of Screening",
title = "Figure 1. Adjusted Predicted Cervical Cancer Screening Probability by Race/Ethnicity Category with 95% Confidence Intervals",
caption = "Asian and American Indian or Alaska Native women have the lowest predicted probability of being screened for cervical cancer. White and multiracial females have the highest predicted probability of screening"
) +
theme_minimal() +
theme(
plot.caption = element_text(hjust = 0.5, size = 10, face = "italic"),
plot.margin = margin(t = 10, r = 10, b = 60, l = 10)
)# Tidy the weighted model output
tbl_coef <- tidy(model, conf.int = TRUE, exponentiate = TRUE) |>
filter(term != "(Intercept)") |>
mutate(
term = forcats::fct_reorder(term, estimate) # reorder for nicer plotting
)
# Clean up term labels so the plot looks nice
tbl_coef_clean <- tbl_coef |>
mutate(
term_clean = case_when(
str_detect(term, "^race_cat") ~ str_replace(term, "race_cat", "Race: "),
str_detect(term, "^education") ~ str_replace(term, "education", "Education: "),
str_detect(term, "^income") ~ str_replace(term, "income", "Income: "),
str_detect(term, "^gen_health") ~ str_replace(term, "gen_health", "General health: "),
str_detect(term, "^priminsur") ~ str_replace(term, "priminsur", "Insurance: "),
str_detect(term, "^persdoc") ~ str_replace(term, "persdoc", "Personal doctor: "),
str_detect(term, "^medcost") ~ str_replace(term, "medcost", "Cost barrier: "),
str_detect(term, "^checkup") ~ str_replace(term, "checkup", "Time since checkup: "),
str_detect(term, "^employ1_cat") ~ str_replace(term, "employ1_cat", "Employment: "),
str_detect(term, "^urbstat1") ~ str_replace(term, "urbstat1", "Urbanicity: "),
str_detect(term, "^age") ~ str_replace(term, "age", "Age: "),
TRUE ~ term
)
)
# Create full plot
ggplot(tbl_coef_clean, aes(x = estimate, y = term_clean)) +
geom_point(size = 3) +
geom_errorbar(
aes(xmin = conf.low, xmax = conf.high),
width = 0.2,
orientation = "y"
) +
geom_vline(xintercept = 1, linetype = "dashed", color = "blue") +
scale_x_log10() +
labs(
title = "Figure 2. Adjusted Odds Ratios of Cervical Cancer Screening with 95% Wald Confidence Intervals for All Covariates",
x = "Odds Ratio (log scale)",
y = "",
caption = "Hispanic, Black, Asian, and American Indian or Alaska Native are significantly associated with lowered odds of cervical cancer screening. Education, income, and age demonstrate a dose response."
) +
theme_minimal() +
theme(
plot.caption = element_text(hjust = 0.5, size = 10, face = "italic"),
plot.margin = margin(t = 10, r = 10, b = 60, l = 10)
)# Put them all in order
race_order <- c(
"Race: Multiracial",
"Race: Hispanic",
"Race: Native Hawaiian or other Pacific Islander",
"Race: Black",
"Race: American Indian or Alaska Native",
"Race: Asian"
)
checkup_order <- c(
"Time since checkup: Within past 2 years",
"Time since checkup: 5 or more years ago",
"Time since checkup: Never",
"Time since checkup: Don't Know/Not Sure"
)
education_order <- c(
"Education: High school graduate",
"Education: Some college",
"Education: College graduate"
)
income_order <- c(
"Income: $15,000 to < $25,000",
"Income: $25,000 to < $35,000",
"Income: $35,000 to < $50,000",
"Income: $50,000 to < $100,000",
"Income: $100,000 to < $200,000",
"Income: More than $200,000"
)
persdoc_order <- c(
"Personal doctor: Yes only one",
"Personal doctor: More than one"
)
tbl_coef_clean <- tbl_coef |>
filter(
str_detect(term, "^(race_cat|checkup|education|income|persdoc)")
) |>
mutate(
term_clean = case_when(
str_detect(term, "^race_catMultiracial") ~ "Race: Multiracial",
str_detect(term, "^race_catHispanic") ~ "Race: Hispanic",
str_detect(term, "^race_catBlack") ~ "Race: Black",
str_detect(term, "^race_catAmerican Indian or Alaska Native") ~ "Race: American Indian or Alaska Native",
str_detect(term, "^race_catAsian") ~ "Race: Asian",
str_detect(term, "^race_catNative Hawaiian or other Pacific Islander") ~ "Race: Native Hawaiian or other Pacific Islander",
str_detect(term, "^checkup") ~ str_replace(term, "checkup", "Time since checkup: "),
str_detect(term, "^education") ~ str_replace(term, "education", "Education: "),
str_detect(term, "^income") ~ str_replace(term, "income", "Income: "),
str_detect(term, "^persdocYes only one") ~ "Personal doctor: Yes only one",
str_detect(term, "^persdocMore than one") ~ "Personal doctor: More than one",
TRUE ~ term
)
) |>
mutate(
term_clean = factor(
term_clean,
levels = c(
race_order,
checkup_order,
persdoc_order,
education_order,
income_order
)
)
)
ggplot(tbl_coef_clean, aes(x = estimate, y = term_clean)) +
geom_point(size = 3) +
geom_errorbar(
aes(xmin = conf.low, xmax = conf.high),
width = 0.2,
orientation = "y"
) +
geom_vline(xintercept = 1, linetype = "dashed", color = "blue") +
scale_x_log10() +
labs(
title = "Adjusted Odds Ratios of Specific Covariates with 95% Confidence Intervals",
x = "Odds Ratio (log scale)",
y = ""
) +
theme_minimal(base_size = 13)