library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(NHANES)
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
data(NHANES)
head(NHANES)
## # A tibble: 6 × 76
##      ID SurveyYr Gender   Age AgeDecade AgeMonths Race1 Race3 Education   
##   <int> <fct>    <fct>  <int> <fct>         <int> <fct> <fct> <fct>       
## 1 51624 2009_10  male      34 " 30-39"        409 White <NA>  High School 
## 2 51624 2009_10  male      34 " 30-39"        409 White <NA>  High School 
## 3 51624 2009_10  male      34 " 30-39"        409 White <NA>  High School 
## 4 51625 2009_10  male       4 " 0-9"           49 Other <NA>  <NA>        
## 5 51630 2009_10  female    49 " 40-49"        596 White <NA>  Some College
## 6 51638 2009_10  male       9 " 0-9"          115 White <NA>  <NA>        
## # ℹ 67 more variables: MaritalStatus <fct>, HHIncome <fct>, HHIncomeMid <int>,
## #   Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>,
## #   Length <dbl>, HeadCirc <dbl>, Height <dbl>, BMI <dbl>,
## #   BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, BPSysAve <int>,
## #   BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, BPDia2 <int>,
## #   BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, DirectChol <dbl>,
## #   TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, UrineVol2 <int>, …
nhanes_analysis <- NHANES %>%
  select(
    ID, Gender, Age, Race1, Education, BMI, Pulse,
    BPSys1, BPDia1, PhysActive, SmokeNow, Diabetes, HealthGen
  ) %>%
  mutate(
    Hypertension = factor(ifelse(BPSys1 >= 140 | BPDia1 >= 90, "Yes", "No"))
  )
health_by_gender <- nhanes_analysis %>%
  group_by(Gender) %>%
  summarise(
    N = n(),
    Mean_BMI = mean(BMI, na.rm = TRUE),
    Pct_Hypertension = sum(Hypertension == "Yes", na.rm = TRUE) /
      sum(!is.na(Hypertension)) * 100
  )

health_by_gender
## # A tibble: 2 × 4
##   Gender     N Mean_BMI Pct_Hypertension
##   <fct>  <int>    <dbl>            <dbl>
## 1 female  5020     26.8             13.1
## 2 male    4980     26.5             14.7
nhanes_analysis <- nhanes_analysis %>%
  mutate(
    Age_Group = cut(Age,
      breaks = c(0, 20, 35, 50, 65, 100),
      labels = c("18-20", "21-35", "36-50", "51-65", "65+")
    )
  )
health_by_education <- nhanes_analysis %>%
  group_by(Education) %>%
  summarise(
    N = n(),
    Mean_SysBP = mean(BPSys1, na.rm = TRUE),
    Pct_Hypertension = sum(Hypertension == "Yes", na.rm = TRUE) /
      sum(!is.na(Hypertension)) * 100
  )

health_by_education
## # A tibble: 6 × 4
##   Education          N Mean_SysBP Pct_Hypertension
##   <fct>          <int>      <dbl>            <dbl>
## 1 8th Grade        451       128.           28.3  
## 2 9 - 11th Grade   888       124.           17.3  
## 3 High School     1517       124.           18.9  
## 4 Some College    2267       122.           16.7  
## 5 College Grad    2098       119.           13.1  
## 6 <NA>            2779       106.            0.718
health_by_education %>%
  filter(!is.na(Education)) %>%
  ggplot(aes(x = Education, y = Pct_Hypertension)) +
  geom_col() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Hypertension prevalence was highest among individuals with less than a high school education and lowest among college graduates, showing a clear relationship between education and cardiovascular risk. This pattern likely reflects social determinants of health such as income, healthcare access, health literacy, chronic stress, and neighborhood resources that differ by educational attainment. These findings highlight the importance of targeting hypertension prevention and control efforts toward populations with lower education levels to reduce cardiovascular health disparities.