library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(NHANES)
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
data(NHANES)
head(NHANES)
## # A tibble: 6 × 76
## ID SurveyYr Gender Age AgeDecade AgeMonths Race1 Race3 Education
## <int> <fct> <fct> <int> <fct> <int> <fct> <fct> <fct>
## 1 51624 2009_10 male 34 " 30-39" 409 White <NA> High School
## 2 51624 2009_10 male 34 " 30-39" 409 White <NA> High School
## 3 51624 2009_10 male 34 " 30-39" 409 White <NA> High School
## 4 51625 2009_10 male 4 " 0-9" 49 Other <NA> <NA>
## 5 51630 2009_10 female 49 " 40-49" 596 White <NA> Some College
## 6 51638 2009_10 male 9 " 0-9" 115 White <NA> <NA>
## # ℹ 67 more variables: MaritalStatus <fct>, HHIncome <fct>, HHIncomeMid <int>,
## # Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>,
## # Length <dbl>, HeadCirc <dbl>, Height <dbl>, BMI <dbl>,
## # BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, BPSysAve <int>,
## # BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, BPDia2 <int>,
## # BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, DirectChol <dbl>,
## # TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, UrineVol2 <int>, …
nhanes_analysis <- NHANES %>%
select(
ID, Gender, Age, Race1, Education, BMI, Pulse,
BPSys1, BPDia1, PhysActive, SmokeNow, Diabetes, HealthGen
) %>%
mutate(
Hypertension = factor(ifelse(BPSys1 >= 140 | BPDia1 >= 90, "Yes", "No"))
)
health_by_gender <- nhanes_analysis %>%
group_by(Gender) %>%
summarise(
N = n(),
Mean_BMI = mean(BMI, na.rm = TRUE),
Pct_Hypertension = sum(Hypertension == "Yes", na.rm = TRUE) /
sum(!is.na(Hypertension)) * 100
)
health_by_gender
## # A tibble: 2 × 4
## Gender N Mean_BMI Pct_Hypertension
## <fct> <int> <dbl> <dbl>
## 1 female 5020 26.8 13.1
## 2 male 4980 26.5 14.7
nhanes_analysis <- nhanes_analysis %>%
mutate(
Age_Group = cut(Age,
breaks = c(0, 20, 35, 50, 65, 100),
labels = c("18-20", "21-35", "36-50", "51-65", "65+")
)
)
health_by_education <- nhanes_analysis %>%
group_by(Education) %>%
summarise(
N = n(),
Mean_SysBP = mean(BPSys1, na.rm = TRUE),
Pct_Hypertension = sum(Hypertension == "Yes", na.rm = TRUE) /
sum(!is.na(Hypertension)) * 100
)
health_by_education
## # A tibble: 6 × 4
## Education N Mean_SysBP Pct_Hypertension
## <fct> <int> <dbl> <dbl>
## 1 8th Grade 451 128. 28.3
## 2 9 - 11th Grade 888 124. 17.3
## 3 High School 1517 124. 18.9
## 4 Some College 2267 122. 16.7
## 5 College Grad 2098 119. 13.1
## 6 <NA> 2779 106. 0.718
health_by_education %>%
filter(!is.na(Education)) %>%
ggplot(aes(x = Education, y = Pct_Hypertension)) +
geom_col() +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Hypertension prevalence was highest among individuals with less than a high school education and lowest among college graduates, showing a clear relationship between education and cardiovascular risk. This pattern likely reflects social determinants of health such as income, healthcare access, health literacy, chronic stress, and neighborhood resources that differ by educational attainment. These findings highlight the importance of targeting hypertension prevention and control efforts toward populations with lower education levels to reduce cardiovascular health disparities.