install.packages("NHANES")

library(tidyverse)    # Data manipulation (dplyr, ggplot2, etc.)
library(NHANES)       # NHANES dataset
library(knitr)        # For professional table output
library(kableExtra)   # Enhanced tables

data(NHANES)
head(NHANES, n = 10)
## # A tibble: 10 × 76
##       ID SurveyYr Gender   Age AgeDecade AgeMonths Race1 Race3 Education   
##    <int> <fct>    <fct>  <int> <fct>         <int> <fct> <fct> <fct>       
##  1 51624 2009_10  male      34 " 30-39"        409 White <NA>  High School 
##  2 51624 2009_10  male      34 " 30-39"        409 White <NA>  High School 
##  3 51624 2009_10  male      34 " 30-39"        409 White <NA>  High School 
##  4 51625 2009_10  male       4 " 0-9"           49 Other <NA>  <NA>        
##  5 51630 2009_10  female    49 " 40-49"        596 White <NA>  Some College
##  6 51638 2009_10  male       9 " 0-9"          115 White <NA>  <NA>        
##  7 51646 2009_10  male       8 " 0-9"          101 White <NA>  <NA>        
##  8 51647 2009_10  female    45 " 40-49"        541 White <NA>  College Grad
##  9 51647 2009_10  female    45 " 40-49"        541 White <NA>  College Grad
## 10 51647 2009_10  female    45 " 40-49"        541 White <NA>  College Grad
## # ℹ 67 more variables: MaritalStatus <fct>, HHIncome <fct>, HHIncomeMid <int>,
## #   Poverty <dbl>, HomeRooms <int>, HomeOwn <fct>, Work <fct>, Weight <dbl>,
## #   Length <dbl>, HeadCirc <dbl>, Height <dbl>, BMI <dbl>,
## #   BMICatUnder20yrs <fct>, BMI_WHO <fct>, Pulse <int>, BPSysAve <int>,
## #   BPDiaAve <int>, BPSys1 <int>, BPDia1 <int>, BPSys2 <int>, BPDia2 <int>,
## #   BPSys3 <int>, BPDia3 <int>, Testosterone <dbl>, DirectChol <dbl>,
## #   TotChol <dbl>, UrineVol1 <int>, UrineFlow1 <dbl>, UrineVol2 <int>, …
#select variables
nhanes_analysis <- NHANES %>%
  select(
    ID,
    Gender,           # Sex (Male/Female)
    Age,              # Age in years
    Race1,            # Race/ethnicity
    Education,        # Education level
    BMI,              # Body Mass Index
    Pulse,            # Resting heart rate
    BPSys1,           # Systolic blood pressure (1st reading)
    BPDia1,           # Diastolic blood pressure (1st reading)
    PhysActive,       # Physically active (Yes/No)
    SmokeNow,         # Current smoking status
    Diabetes,         # Diabetes diagnosis (Yes/No)
    HealthGen         # General health rating
  ) %>% mutate(
    Hypertension = ifelse(BPSys1 >= 140 | BPDia1 >= 90, "Yes", "No")
  )
##group hypertension by education level
health_by_education <- nhanes_analysis %>%
  group_by(Education) %>%
  summarise(
    N = n(),
    Mean_SysBP = round(mean(BPSys1, na.rm = TRUE), 2),
    Pct_Hypertension = round(
      sum(Hypertension == "Yes", na.rm = TRUE) / sum(!is.na(Hypertension)) * 100, 2)
  )
print(health_by_education)
## # A tibble: 6 × 4
##   Education          N Mean_SysBP Pct_Hypertension
##   <fct>          <int>      <dbl>            <dbl>
## 1 8th Grade        451       128.            28.3 
## 2 9 - 11th Grade   888       124.            17.3 
## 3 High School     1517       124.            18.9 
## 4 Some College    2267       122.            16.6 
## 5 College Grad    2098       119.            13.1 
## 6 <NA>            2779       106.             0.72
##create a barchart 
health_by_education %>%
  filter(!is.na(Education)) %>%
  ggplot(aes(x = Education, y = Pct_Hypertension)) +
  geom_col(fill = "darkred", alpha = 0.7) +
  geom_text(aes(label = paste0(Pct_Hypertension, "%")), 
            vjust = -0.5, size = 3) +
  labs(
    title = "Hypertension Prevalence by Education Level",
    x = "Education Level",
    y = "Percent with Hypertension (%)",
    caption = "Source: NHANES"
  ) +
  ylim(0, 50) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

##The bar graph clearly shows an inverse link between education level and the prevalence of hypertension, with college graduates having the lowest prevalence (≈13%) and those with only an eighth-grade education having the highest (≈28%). Social determinants of health that tend to improve with increasing educational attainment, such as income, work stress, health literacy, access to preventive treatment, and community resources, are probably reflected in this pattern.