# Load required packages
library(tidyverse)
library(haven)
library(knitr)
library(kableExtra)
library(plotly)
library(broom)
library(car)
library(ggeffects)
library(gtsummary)
library(ggstats)
library(janitor)
library(broom.helpers)
library(ggplot2)The BRFSS is a large-scale telephone survey that collects data on health-related risk behaviors, chronic health conditions, and use of preventive services from U.S. residents.
# Load the full BRFSS 2024 dataset
brfss_full <- read_xpt("/Users/zoya_hayes/Desktop/EPI553/EPI553_Coding/FinalProject/BRFSS2024") %>%
janitor::clean_names()
# Display dataset dimensions
names(brfss_full)## [1] "state" "fmonth" "idate" "imonth" "iday" "iyear"
## [7] "dispcode" "seqno" "psu" "ctelenm1" "pvtresd1" "colghous"
## [13] "statere1" "celphon1" "ladult1" "numadult" "respslc1" "landsex3"
## [19] "safetime" "ctelnum1" "cellfon5" "cadult1" "cellsex3" "pvtresd3"
## [25] "cclghous" "cstate1" "landline" "hhadult" "sexvar" "genhlth"
## [31] "physhlth" "menthlth" "poorhlth" "primins2" "persdoc3" "medcost1"
## [37] "checkup1" "exerany2" "lastden4" "rmvteth4" "cvdinfr4" "cvdcrhd4"
## [43] "cvdstrk3" "asthma3" "asthnow" "chcscnc1" "chcocnc1" "chccopd3"
## [49] "addepev3" "chckdny2" "havarth4" "diabete4" "diabage4" "marital"
## [55] "educa" "renthom1" "numhhol4" "numphon4" "cpdemo1c" "veteran3"
## [61] "employ1" "children" "income3" "pregnant" "weight2" "height3"
## [67] "deaf" "blind" "decide" "diffwalk" "diffdres" "diffalon"
## [73] "hadmam" "howlong" "cervscrn" "crvclcnc" "crvclpap" "crvclhpv"
## [79] "hadhyst2" "hadsigm4" "colnsigm" "colntes1" "sigmtes1" "lastsig4"
## [85] "colncncr" "vircolo1" "vclntes2" "smalstol" "stoltest" "stooldn2"
## [91] "bldstfit" "sdnates1" "smoke100" "smokday2" "usenow3" "ecignow3"
## [97] "lcsfirst" "lcsnumcg" "lcsctsc1" "lcsscncr" "lcsctwhn" "alcday4"
## [103] "avedrnk4" "drnk3ge5" "maxdrnks" "flushot7" "flshtmy3" "imfvpla5"
## [109] "pneuvac4" "hivtst7" "hivtstd3" "hivrisk5" "pdiabts1" "prediab2"
## [115] "diabtype" "insulin1" "chkhemo3" "eyeexam1" "diabeye1" "diabedu1"
## [121] "feetsore" "arthexer" "shingle2" "hpvadvc4" "hpvadsh1" "tetanus1"
## [127] "cncrdiff" "cncrage" "cncrtyp2" "csrvtrt3" "csrvdoc1" "csrvsum"
## [133] "csrvrtrn" "csrvinst" "csrvinsr" "csrvdein" "csrvclin" "csrvpain"
## [139] "csrvctl2" "psatest1" "psatime1" "pcpsars2" "psasugs1" "pcstalk2"
## [145] "cimemlo1" "cdworry" "cddiscu1" "cdhous1" "cdsocia1" "caregiv1"
## [151] "crgvrel5" "crgvprb4" "crgvalzd" "crgvnurs" "crgvper2" "crgvhou2"
## [157] "crgvhrs2" "crgvlng2" "acedeprs" "acedrink" "acedrugs" "aceprisn"
## [163] "acedivrc" "acepunch" "acehurt1" "aceswear" "acetouch" "acetthem"
## [169] "acehvsex" "aceadsaf" "aceadned" "lsatisfy" "emtsuprt" "sdlonely"
## [175] "sdhemply" "foodstmp" "sdhfood1" "sdhbills" "sdhutils" "sdhtrnsp"
## [181] "howsafe1" "marijan1" "marjsmok" "marjeat" "marjvape" "marjdab"
## [187] "marjothr" "usemrjn4" "lastsmk2" "stopsmk2" "mentcigs" "mentecig"
## [193] "heattbco" "ssbsugr2" "ssbfrut3" "firearm5" "gunload" "loadulk2"
## [199] "rcsgend1" "rcsxbrth" "rcsrltn2" "casthdx2" "casthno2" "somale"
## [205] "sofemale" "hadsex" "pfpprvn4" "typcntr9" "nobcuse8" "qstver"
## [211] "qstlang" "hpvdsht" "icfqstvr" "metstat" "urbstat" "mscode"
## [217] "ststr" "strwt" "rawrake" "wt2rake" "imprace" "chispnc"
## [223] "crace1" "cageg" "cllcpwt" "dualuse" "dualcor" "llcpwt2"
## [229] "llcpwt" "rfhlth" "phys14d" "ment14d" "hlthpl2" "hcvu654"
## [235] "totinda" "exteth3" "alteth3" "denvst3" "michd" "ltasth1"
## [241] "casthm1" "asthms1" "drdxar2" "mrace1" "hispanc" "race"
## [247] "raceg21" "racegr3" "raceprv" "sex" "ageg5yr" "age65yr"
## [253] "age80" "age_g" "htin4" "htm4" "wtkg3" "bmi5"
## [259] "bmi5cat" "rfbmi5" "chldcnt" "educag" "incomg1" "rfmam23"
## [265] "mam402y" "crvscrn" "rfpap37" "hpv5yr1" "paphpv1" "hadcoln"
## [271] "clnscp2" "hadsigm" "sgmscp2" "sgms102" "rfblds6" "stoldn2"
## [277] "vircol2" "sbonti2" "crcrec3" "smoker3" "rfsmok3" "cureci3"
## [283] "lcslast" "lcsnumc" "lcsage" "lcsysmk" "packday" "packyrs"
## [289] "lcsyqts" "lcssmkg" "lcselig" "lcsctsn" "lcspstf" "drnkany6"
## [295] "drocdy4" "rfbing6" "drnkwk3" "rfdrhv9" "flshot7" "pneumo3"
## [301] "aidtst4"
# Select variables of interest and create analytic dataset
brfss_subset <- brfss_full %>%
select(
# Outcome: Have you ever had a cervical cancer screening test
cervscrn,
# Demographics
ageg5yr, # Age category, fourteen-level age category
sexvar, # Sex
race, # Race/ethnicity
educag, # Education level
employ1, # Employment status
# Health status
genhlth, # General health
incomg1, # Income category
urbstat, # Urban/Rural Status
primins2, # Primary source of health care coverage
persdoc3, # Personal health care provider
medcost1, # Delayed care due to cost
checkup1, # Time since last routine checkup
)
# Filter for only females
brfss_female <- brfss_subset %>%
filter(
sexvar == 2) # 2 = Female in BRFSS
# Filter for age (25-64)
brfss_filtered <- brfss_female %>%
filter(
ageg5yr %in% 2:9) # 1–9 correspond to ages 25–64
# Recode variables
brfss_recode <- brfss_filtered %>%
mutate(
cervicalscr = factor(case_when(
cervscrn == 1 ~ "Screened",
cervscrn == 2 ~ "Not Screened"),
levels =
c("Screened", "Not Screened")),
age = factor(case_when(
ageg5yr == 2 ~ "25-29",
ageg5yr == 3 ~ "30-34",
ageg5yr == 4 ~ "35-39",
ageg5yr == 5 ~ "40-44",
ageg5yr == 6 ~ "45-49",
ageg5yr == 7 ~ "50-54",
ageg5yr == 8 ~ "55-59",
ageg5yr == 9 ~ "60-64"),
levels = c("25-29","30-34","35-39","40-44", "45-49","50-54","55-59","60-64")),
sex = factor("Female",
levels = "Female"),
education = factor(case_when(
educag == 1 ~ "< High school",
educag == 2 ~ "High school graduate",
educag == 3 ~ "Some college",
educag == 4 ~ "College graduate"),
levels = c("< High school", "High school graduate",
"Some college", "College graduate")),
race_cat = factor(case_when(
race == 1 ~ "White",
race == 2 ~ "Black",
race == 3 ~ "American Indian or Alaska Native",
race == 4 ~ "Asian",
race == 5 ~ "Native Hawaiian or other Pacific Islander",
race == 6 ~ "Other race",
race == 7 ~ "Multiracial",
race == 8 ~ "Hispanic"),
levels = c("White",
"Black",
"American Indian or Alaska Native",
"Asian",
"Native Hawaiian or other Pacific Islander",
"Other race",
"Multiracial",
"Hispanic")),
employ1_cat = factor(case_when(
employ1 == 1 ~ "Employed for wages",
employ1 == 2 ~ "Self-employed",
employ1 == 3 ~ "Out of work for 1 year or more",
employ1 == 4 ~ "Out of work for less than 1 year",
employ1 == 5 ~ "A homemaker",
employ1 == 6 ~ "A student",
employ1 == 7 ~ "Retired",
employ1 == 8 ~ "Unable to work"),
levels = c("Employed for wages", "Self-employed",
"Out of work for 1 year or more", "Out of work for less than 1 year",
"A homemaker", "A student", "Retired", "Unable to work")),
gen_health = factor(case_when(
genhlth == 1 ~ "Excellent",
genhlth == 2 ~ "Very Good",
genhlth == 3 ~ "Good",
genhlth == 4 ~ "Fair",
genhlth == 5 ~ "Poor"),
levels = c("Excellent", "Very Good", "Good", "Fair", "Poor")),
income = factor(case_when(
incomg1 == 1 ~ "< $15,000",
incomg1 == 2 ~ "$15,000 to < $25,000",
incomg1 == 3 ~ "$25,000 to < $35,000",
incomg1 == 4 ~ "$35,000 to < $50,000",
incomg1 == 5 ~ "$50,000 to < $100,000",
incomg1 == 6 ~ "$100,000 to < $200,000",
incomg1 == 7 ~ "> $200,000"),
levels = c("< $15,000", "$15,000 to < $25,000", "$25,000 to < $35,000", "$35,000 to < $50,000", "$50,000 to < $100,000", "$100,000 to < $200,000", "> $200,000")),
urbstat1 = factor(case_when(
urbstat == 1 ~ "Urban County",
urbstat == 2 ~ "Rural County"),
levels = c("Urban County", "Rural County")),
priminsur = factor(case_when(
primins2 == 1 ~ "Employer/Union Insurance",
primins2 == 2 ~ "Private Nongovernmental Insurance",
primins2 == 3 ~ "Medicare",
primins2 == 4 ~ "Medigap",
primins2 == 5 ~ "Medicaid",
primins2 == 6 ~ "Children's Health Insurance Program",
primins2 == 7 ~ "Military Related Health Care",
primins2 == 8 ~ "Indian Health Service",
primins2 == 9 ~ "State Sponsored Health Plan",
primins2 == 10 ~ "Other Governmental Program",
primins2 == 88 ~ "No Coverage of Any Type"),
levels = c("Employer/Union Insurance", "Private Nongovernmental Insurance", "Medicare", "Medigap", "Medicaid", "Children's Health Insurance Program", "Military Related Health Care", "Indian Health Service", "State Sponsored Health Plan", "Other Governmental Program", "No Coverage of Any Type")),
persdoc = factor(case_when(
persdoc3 == 1 ~ "Yes, only one",
persdoc3 == 2 ~ "More than one",
persdoc3 == 3 ~ "No"),
levels = c("Yes, only one", "More than one", "No")),
medcost = factor(case_when(
medcost1 == 1 ~ "Yes",
medcost1 == 2 ~ "No"),
levels = c("Yes", "No")),
checkup = factor(case_when(
checkup1 == 1 ~ "Within past year",
checkup1 == 2 ~ "Within past 2 years",
checkup1 == 3 ~ "Within past 5 years",
checkup1 == 4 ~ "5 or more years ago",
checkup1 == 7 ~ "Don't Know/Not Sure",
checkup1 == 8 ~ "Never"),
levels = c("Within past year", "Within past 2 years", "Within Past 5 years", "5 or more years ago","Don't Know/Not Sure", "Never"))
)
# Select only the recoded variables
brfss_recode2 <- brfss_recode %>%
select(cervicalscr, age, sex, education, race_cat, employ1_cat, gen_health, income, urbstat1, priminsur, persdoc, medcost, checkup)
# Count missing values in each column
missing_summary <- data.frame(
Variable = names(brfss_recode2),
Missing_Count = colSums(is.na(brfss_recode2)),
Missing_Percent = round(colSums(is.na(brfss_recode2)) / nrow(brfss_recode2) * 100, 2)
)
# Show variables with the most missing data
print(missing_summary[order(-missing_summary$Missing_Count), ][1:13, ]) %>%
kable(
caption = "Variables by Missingness"
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "center"
)## Variable Missing_Count Missing_Percent
## income income 19076 14.88
## cervicalscr cervicalscr 10405 8.11
## checkup checkup 6352 4.95
## urbstat1 urbstat1 5044 3.93
## priminsur priminsur 3494 2.72
## employ1_cat employ1_cat 2230 1.74
## race_cat race_cat 1722 1.34
## persdoc persdoc 924 0.72
## education education 422 0.33
## medcost medcost 400 0.31
## gen_health gen_health 259 0.20
## age age 0 0.00
## sex sex 0 0.00
| Variable | Missing_Count | Missing_Percent | |
|---|---|---|---|
| income | income | 19076 | 14.88 |
| cervicalscr | cervicalscr | 10405 | 8.11 |
| checkup | checkup | 6352 | 4.95 |
| urbstat1 | urbstat1 | 5044 | 3.93 |
| priminsur | priminsur | 3494 | 2.72 |
| employ1_cat | employ1_cat | 2230 | 1.74 |
| race_cat | race_cat | 1722 | 1.34 |
| persdoc | persdoc | 924 | 0.72 |
| education | education | 422 | 0.33 |
| medcost | medcost | 400 | 0.31 |
| gen_health | gen_health | 259 | 0.20 |
| age | age | 0 | 0.00 |
| sex | sex | 0 | 0.00 |
brfss_recode3 <- brfss_recode2 %>%
# Filter to complete cases only
drop_na()
set.seed(553) # For reproducibility
# Sample 50000 observations for manageable analysis
brfss_analytic <- brfss_recode3 %>%
slice_sample(n = 50000)
# Display subset dimensions
cat("Working subset dimensions:",
nrow(brfss_analytic), "observations,",
ncol(brfss_analytic), "variables\n")## Working subset dimensions: 50000 observations, 13 variables
# Save for later use
saveRDS(brfss_analytic,
"/Users/zoya_hayes/Desktop/EPI553/EPI553_Coding/FinalProject/brfss_final_project_2024.rds")
tibble(Metric = c("Observations", "Variables"),
Value = c(nrow(brfss_analytic), ncol(brfss_analytic))) %>%
kable(caption = "Analytic Dataset Dimensions") %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Metric | Value |
|---|---|
| Observations | 50000 |
| Variables | 13 |
# Create Table 1 Descriptive Statistics
brfss_analytic %>%
select(cervicalscr, race_cat, age, education, employ1_cat, gen_health, income, urbstat1, priminsur, persdoc, medcost, checkup) %>%
tbl_summary(
by = race_cat, # Stratify Table by Race/Ethnicity
label = list(
cervicalscr ~ "Ever Had a Cervical Cancer Screening Test",
age ~ "Age Category",
education ~ "Level of Education Completed",
employ1_cat ~ "Current Employment Status",
gen_health ~ "General Health Status",
income ~ "Income Category",
urbstat1 ~ "Urban/Rural Status",
priminsur ~ "Current Health Care Coverage",
persdoc ~ "Personal Health Care Provider",
medcost ~ "Could not afford to see Doctor (past 12 months)",
checkup ~ "Time since last routine checkup"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 1,
missing = "no"
) %>%
add_n() %>%
bold_labels() %>%
modify_caption("**Table 1. Descriptive Statistics — BRFSS 2024 Analytic Sample (n = 50,000)**") %>%
modify_footnote(
all_stat_cols() ~
"Analytic sample includes 50,000 complete-case observations. Missing values were excluded listwise for all variables included in this table."
)| Characteristic | N | White N = 35,4961 |
Black N = 4,8791 |
American Indian or Alaska Native N = 8701 |
Asian N = 1,2481 |
Native Hawaiian or other Pacific Islander N = 1781 |
Other race N = 2721 |
Multiracial N = 1,2721 |
Hispanic N = 5,7851 |
|---|---|---|---|---|---|---|---|---|---|
| Ever Had a Cervical Cancer Screening Test | 50,000 | ||||||||
| Â Â Â Â Screened | 32,904 (93%) | 4,070 (83%) | 735 (84%) | 924 (74%) | 138 (78%) | 241 (89%) | 1,161 (91%) | 4,732 (82%) | |
| Â Â Â Â Not Screened | 2,592 (7.3%) | 809 (17%) | 135 (16%) | 324 (26%) | 40 (22%) | 31 (11%) | 111 (8.7%) | 1,053 (18%) | |
| Age Category | 50,000 | ||||||||
| Â Â Â Â 25-29 | 2,341 (6.6%) | 386 (7.9%) | 54 (6.2%) | 204 (16%) | 16 (9.0%) | 22 (8.1%) | 169 (13%) | 808 (14%) | |
| Â Â Â Â 30-34 | 2,980 (8.4%) | 539 (11%) | 80 (9.2%) | 227 (18%) | 23 (13%) | 20 (7.4%) | 189 (15%) | 927 (16%) | |
| Â Â Â Â 35-39 | 3,762 (11%) | 560 (11%) | 102 (12%) | 184 (15%) | 18 (10%) | 25 (9.2%) | 176 (14%) | 852 (15%) | |
| Â Â Â Â 40-44 | 4,254 (12%) | 680 (14%) | 110 (13%) | 160 (13%) | 36 (20%) | 45 (17%) | 172 (14%) | 812 (14%) | |
| Â Â Â Â 45-49 | 4,272 (12%) | 604 (12%) | 118 (14%) | 148 (12%) | 18 (10%) | 30 (11%) | 153 (12%) | 762 (13%) | |
| Â Â Â Â 50-54 | 4,913 (14%) | 705 (14%) | 110 (13%) | 138 (11%) | 18 (10%) | 44 (16%) | 133 (10%) | 655 (11%) | |
| Â Â Â Â 55-59 | 5,692 (16%) | 685 (14%) | 140 (16%) | 89 (7.1%) | 25 (14%) | 42 (15%) | 143 (11%) | 516 (8.9%) | |
| Â Â Â Â 60-64 | 7,282 (21%) | 720 (15%) | 156 (18%) | 98 (7.9%) | 24 (13%) | 44 (16%) | 137 (11%) | 453 (7.8%) | |
| Level of Education Completed | 50,000 | ||||||||
| Â Â Â Â < High school | 855 (2.4%) | 177 (3.6%) | 65 (7.5%) | 11 (0.9%) | 7 (3.9%) | 8 (2.9%) | 42 (3.3%) | 1,195 (21%) | |
| Â Â Â Â High school graduate | 6,248 (18%) | 1,096 (22%) | 221 (25%) | 142 (11%) | 62 (35%) | 60 (22%) | 217 (17%) | 1,398 (24%) | |
| Â Â Â Â Some college | 9,355 (26%) | 1,478 (30%) | 315 (36%) | 166 (13%) | 58 (33%) | 72 (26%) | 354 (28%) | 1,321 (23%) | |
| Â Â Â Â College graduate | 19,038 (54%) | 2,128 (44%) | 269 (31%) | 929 (74%) | 51 (29%) | 132 (49%) | 659 (52%) | 1,871 (32%) | |
| Current Employment Status | 50,000 | ||||||||
| Â Â Â Â Employed for wages | 22,392 (63%) | 3,106 (64%) | 480 (55%) | 851 (68%) | 102 (57%) | 147 (54%) | 756 (59%) | 3,130 (54%) | |
| Â Â Â Â Self-employed | 3,017 (8.5%) | 322 (6.6%) | 70 (8.0%) | 102 (8.2%) | 17 (9.6%) | 23 (8.5%) | 116 (9.1%) | 536 (9.3%) | |
| Â Â Â Â Out of work for 1 year or more | 687 (1.9%) | 171 (3.5%) | 47 (5.4%) | 30 (2.4%) | 8 (4.5%) | 13 (4.8%) | 40 (3.1%) | 239 (4.1%) | |
| Â Â Â Â Out of work for less than 1 year | 749 (2.1%) | 221 (4.5%) | 46 (5.3%) | 38 (3.0%) | 8 (4.5%) | 10 (3.7%) | 61 (4.8%) | 277 (4.8%) | |
| Â Â Â Â A homemaker | 2,720 (7.7%) | 145 (3.0%) | 57 (6.6%) | 103 (8.3%) | 15 (8.4%) | 26 (9.6%) | 82 (6.4%) | 956 (17%) | |
| Â Â Â Â A student | 336 (0.9%) | 117 (2.4%) | 17 (2.0%) | 55 (4.4%) | 2 (1.1%) | 3 (1.1%) | 37 (2.9%) | 79 (1.4%) | |
| Â Â Â Â Retired | 2,581 (7.3%) | 245 (5.0%) | 40 (4.6%) | 34 (2.7%) | 12 (6.7%) | 11 (4.0%) | 45 (3.5%) | 127 (2.2%) | |
| Â Â Â Â Unable to work | 3,014 (8.5%) | 552 (11%) | 113 (13%) | 35 (2.8%) | 14 (7.9%) | 39 (14%) | 135 (11%) | 441 (7.6%) | |
| General Health Status | 50,000 | ||||||||
| Â Â Â Â Excellent | 5,372 (15%) | 584 (12%) | 90 (10%) | 229 (18%) | 37 (21%) | 47 (17%) | 156 (12%) | 740 (13%) | |
| Â Â Â Â Very Good | 13,208 (37%) | 1,335 (27%) | 194 (22%) | 429 (34%) | 33 (19%) | 85 (31%) | 382 (30%) | 1,361 (24%) | |
| Â Â Â Â Good | 10,979 (31%) | 1,882 (39%) | 342 (39%) | 459 (37%) | 60 (34%) | 68 (25%) | 442 (35%) | 2,257 (39%) | |
| Â Â Â Â Fair | 4,369 (12%) | 864 (18%) | 171 (20%) | 105 (8.4%) | 36 (20%) | 42 (15%) | 213 (17%) | 1,163 (20%) | |
| Â Â Â Â Poor | 1,568 (4.4%) | 214 (4.4%) | 73 (8.4%) | 26 (2.1%) | 12 (6.7%) | 30 (11%) | 79 (6.2%) | 264 (4.6%) | |
| Income Category | 50,000 | ||||||||
| Â Â Â Â < $15,000 | 1,634 (4.6%) | 462 (9.5%) | 111 (13%) | 49 (3.9%) | 15 (8.4%) | 28 (10%) | 102 (8.0%) | 716 (12%) | |
| Â Â Â Â $15,000 to < $25,000 | 2,201 (6.2%) | 506 (10%) | 103 (12%) | 62 (5.0%) | 20 (11%) | 31 (11%) | 106 (8.3%) | 934 (16%) | |
| Â Â Â Â $25,000 to < $35,000 | 2,575 (7.3%) | 615 (13%) | 144 (17%) | 91 (7.3%) | 22 (12%) | 28 (10%) | 123 (9.7%) | 993 (17%) | |
| Â Â Â Â $35,000 to < $50,000 | 3,453 (9.7%) | 798 (16%) | 153 (18%) | 125 (10%) | 28 (16%) | 24 (8.8%) | 159 (13%) | 845 (15%) | |
| Â Â Â Â $50,000 to < $100,000 | 10,922 (31%) | 1,388 (28%) | 226 (26%) | 323 (26%) | 44 (25%) | 74 (27%) | 358 (28%) | 1,234 (21%) | |
| Â Â Â Â $100,000 to < $200,000 | 10,822 (30%) | 864 (18%) | 104 (12%) | 346 (28%) | 37 (21%) | 63 (23%) | 304 (24%) | 799 (14%) | |
| Â Â Â Â > $200,000 | 3,889 (11%) | 246 (5.0%) | 29 (3.3%) | 252 (20%) | 12 (6.7%) | 24 (8.8%) | 120 (9.4%) | 264 (4.6%) | |
| Urban/Rural Status | 50,000 | ||||||||
| Â Â Â Â Urban County | 30,102 (85%) | 4,630 (95%) | 543 (62%) | 1,223 (98%) | 173 (97%) | 249 (92%) | 1,136 (89%) | 5,494 (95%) | |
| Â Â Â Â Rural County | 5,394 (15%) | 249 (5.1%) | 327 (38%) | 25 (2.0%) | 5 (2.8%) | 23 (8.5%) | 136 (11%) | 291 (5.0%) | |
| Current Health Care Coverage | 50,000 | ||||||||
| Â Â Â Â Employer/Union Insurance | 21,322 (60%) | 2,294 (47%) | 264 (30%) | 821 (66%) | 83 (47%) | 124 (46%) | 655 (51%) | 2,073 (36%) | |
| Â Â Â Â Private Nongovernmental Insurance | 4,223 (12%) | 415 (8.5%) | 49 (5.6%) | 135 (11%) | 11 (6.2%) | 33 (12%) | 105 (8.3%) | 472 (8.2%) | |
| Â Â Â Â Medicare | 2,109 (5.9%) | 437 (9.0%) | 63 (7.2%) | 40 (3.2%) | 20 (11%) | 27 (9.9%) | 83 (6.5%) | 358 (6.2%) | |
| Â Â Â Â Medigap | 16 (<0.1%) | 0 (0%) | 3 (0.3%) | 0 (0%) | 2 (1.1%) | 0 (0%) | 0 (0%) | 4 (<0.1%) | |
| Â Â Â Â Medicaid | 3,360 (9.5%) | 976 (20%) | 179 (21%) | 106 (8.5%) | 24 (13%) | 45 (17%) | 183 (14%) | 954 (16%) | |
| Â Â Â Â Children's Health Insurance Program | 12 (<0.1%) | 3 (<0.1%) | 1 (0.1%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 6 (0.1%) | |
| Â Â Â Â Military Related Health Care | 905 (2.5%) | 137 (2.8%) | 20 (2.3%) | 20 (1.6%) | 6 (3.4%) | 6 (2.2%) | 44 (3.5%) | 98 (1.7%) | |
| Â Â Â Â Indian Health Service | 11 (<0.1%) | 1 (<0.1%) | 177 (20%) | 2 (0.2%) | 0 (0%) | 4 (1.5%) | 28 (2.2%) | 11 (0.2%) | |
| Â Â Â Â State Sponsored Health Plan | 1,236 (3.5%) | 190 (3.9%) | 31 (3.6%) | 42 (3.4%) | 15 (8.4%) | 11 (4.0%) | 67 (5.3%) | 396 (6.8%) | |
| Â Â Â Â Other Governmental Program | 933 (2.6%) | 179 (3.7%) | 24 (2.8%) | 42 (3.4%) | 11 (6.2%) | 6 (2.2%) | 39 (3.1%) | 220 (3.8%) | |
| Â Â Â Â No Coverage of Any Type | 1,369 (3.9%) | 247 (5.1%) | 59 (6.8%) | 40 (3.2%) | 6 (3.4%) | 16 (5.9%) | 68 (5.3%) | 1,193 (21%) | |
| Personal Health Care Provider | 50,000 | ||||||||
| Â Â Â Â Yes, only one | 18,810 (53%) | 2,425 (50%) | 454 (52%) | 669 (54%) | 103 (58%) | 123 (45%) | 610 (48%) | 2,842 (49%) | |
| Â Â Â Â More than one | 14,355 (40%) | 2,060 (42%) | 304 (35%) | 420 (34%) | 58 (33%) | 116 (43%) | 543 (43%) | 1,617 (28%) | |
| Â Â Â Â No | 2,331 (6.6%) | 394 (8.1%) | 112 (13%) | 159 (13%) | 17 (9.6%) | 33 (12%) | 119 (9.4%) | 1,326 (23%) | |
| Could not afford to see Doctor (past 12 months) | 50,000 | 4,022 (11%) | 744 (15%) | 147 (17%) | 102 (8.2%) | 33 (19%) | 54 (20%) | 230 (18%) | 1,362 (24%) |
| Time since last routine checkup | 50,000 | ||||||||
| Â Â Â Â Within past year | 30,254 (85%) | 4,400 (90%) | 733 (84%) | 1,038 (83%) | 141 (79%) | 225 (83%) | 1,039 (82%) | 4,667 (81%) | |
| Â Â Â Â Within past 2 years | 3,695 (10%) | 402 (8.2%) | 93 (11%) | 166 (13%) | 25 (14%) | 38 (14%) | 165 (13%) | 808 (14%) | |
| Â Â Â Â Within Past 5 years | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | |
| Â Â Â Â 5 or more years ago | 1,289 (3.6%) | 48 (1.0%) | 34 (3.9%) | 25 (2.0%) | 8 (4.5%) | 8 (2.9%) | 62 (4.9%) | 176 (3.0%) | |
| Â Â Â Â Don't Know/Not Sure | 196 (0.6%) | 25 (0.5%) | 6 (0.7%) | 11 (0.9%) | 2 (1.1%) | 0 (0%) | 5 (0.4%) | 44 (0.8%) | |
| Â Â Â Â Never | 62 (0.2%) | 4 (<0.1%) | 4 (0.5%) | 8 (0.6%) | 2 (1.1%) | 1 (0.4%) | 1 (<0.1%) | 90 (1.6%) | |
| 1 Analytic sample includes 50,000 complete-case observations. Missing values were excluded listwise for all variables included in this table. | |||||||||
# Create a bar plot for cervical cancer screening count
ggplot(brfss_analytic, aes(x = cervicalscr)) +
geom_bar() +
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5) +
labs(title = "Figure 1: Bar Graph of Cervical Cancer Screening with Counts",
x = "Cervical Cancer Screening",
y = "Count") +
theme_minimal()# Create bar plot demonstrating the proportion of cervical cancer screening by race/ethnicity
brfss_analytic %>%
group_by(race_cat) %>%
summarize(prop_screened = mean(cervicalscr == "Screened")) %>%
ggplot(aes(x = race_cat, y = prop_screened)) +
geom_col(fill = "steelblue") +
labs(
x = "Race/Ethnicity",
y = "Proportion Screened",
title = "Figure 2: Proportion of Cervical Cancer Screening by Race/Ethnicity"
) +
theme_minimal()# Create split bar plot for race/ethnicity
ggplot(brfss_analytic, aes(x = race_cat, fill = cervicalscr)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Race/Ethnicity",
y = "Percent",
fill = "Screened",
title = "Figure 3: Cervical Screening Distribution by Race/Ethnicity"
) +
theme_minimal()# Create proportions for each race/ethnicity category
tbl <- brfss_analytic %>%
group_by(race_cat) %>%
summarize(
prop = mean(cervicalscr == "Screened"),
n = n()
)
# Create Screening Proportion by Race and Ethnicity with Confidence Intervals
ggplot(tbl, aes(x = race_cat, y = prop)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = prop - 0.03, ymax = prop + 0.03), width = 0.2) +
labs(
x = "Race/Ethnicity",
y = "Proportion Screened",
title = "Figure 4: Screening Proportions with Approximate CIs"
) +
theme_minimal()# Create split bar plot for education levels
ggplot(brfss_analytic, aes(x = education, fill = cervicalscr)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Level of Education",
y = "Percent",
fill = "Screened",
title = "Figure 5: Cervical Screening Distribution by Level of Education"
) +
theme_minimal()# Create split bar plot for employment status
ggplot(brfss_analytic, aes(x = employ1_cat, fill = cervicalscr)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Employment Status",
y = "Percent",
fill = "Screened",
title = "Cervical Screening Distribution by Employment Status"
) +
theme_minimal()# Create split bar plot for personal doctor status
ggplot(brfss_analytic, aes(x = persdoc, fill = cervicalscr)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Personal Doctor Status",
y = "Percent",
fill = "Screened",
title = "Cervical Screening Distribution by Personal Doctor Status"
) +
theme_minimal()# Create split bar plot for age categories
ggplot(brfss_analytic, aes(x = age, fill = cervicalscr)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Age Category (Years)",
y = "Percent",
fill = "Screened",
title = "Figure 6: Cervical Screening Distribution by Age Category"
) +
theme_minimal()