# Load required packages
library(tidyverse) # Data manipulation (dplyr, ggplot2, etc.)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(NHANES) # NHANES dataset
## Warning: package 'NHANES' was built under R version 4.5.2
library(knitr) # For professional table output
## Warning: package 'knitr' was built under R version 4.5.2
library(kableExtra) # Enhanced tables
## Warning: package 'kableExtra' was built under R version 4.5.2
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
# Select key variables for analysis
nhanes_analysis <- NHANES %>%
dplyr:: select(
ID,
Gender, # Sex (Male/Female)
Age, # Age in years
Race1, # Race/ethnicity
Education, # Education level
BMI, # Body Mass Index
Pulse, # Resting heart rate
BPSys1, # Systolic blood pressure (1st reading)
BPDia1, # Diastolic blood pressure (1st reading)
PhysActive, # Physically active (Yes/No)
SmokeNow, # Current smoking status
Diabetes, # Diabetes diagnosis (Yes/No)
HealthGen # General health rating
) %>%
# Create a binary hypertension indicator (BPSys1 >= 140 OR BPDia1 >= 90)
mutate(
Hypertension = ifelse(BPSys1 >= 140 | BPDia1 >= 90, "Yes", "No")
)
nhanes_analysis2 <- nhanes_analysis %>%
filter(complete.cases(.))
Using the nhanes_analysis data, explore:
“How does hypertension prevalence vary by education level?”
Write code to:
# Your code here:
health_by_education <- nhanes_analysis %>%
group_by(Education) %>%
summarise(
N = n(),
Mean_SysBP = round(mean(BPSys1, na.rm = TRUE), 2),
Pct_Hypertension = round(
sum(Hypertension == "Yes", na.rm = TRUE) / sum(!is.na(Hypertension)) * 100, 2)
)
print(health_by_education)
## # A tibble: 6 × 4
## Education N Mean_SysBP Pct_Hypertension
## <fct> <int> <dbl> <dbl>
## 1 8th Grade 451 128. 28.3
## 2 9 - 11th Grade 888 124. 17.3
## 3 High School 1517 124. 18.9
## 4 Some College 2267 122. 16.6
## 5 College Grad 2098 119. 13.1
## 6 <NA> 2779 106. 0.72
Create a bar chart showing hypertension by education level:
# Your visualization here:
health_by_education %>%
filter(!is.na(Education)) %>%
ggplot(aes(x = Education, y = Pct_Hypertension)) +
geom_col(fill = "steelblue", alpha = 0.7) +
geom_text(aes(label = paste0(Pct_Hypertension, "%")),
vjust = -0.5, size = 3) +
labs(
title = "Hypertension Prevalence by Education Level",
x = "Education Level",
y = "Percent with Hypertension (%)",
caption = "Source: NHANES"
) +
ylim(0, 50) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Write 2-3 sentences:
“What does this pattern tell us about health disparities and social determinants?” Those who only completed an education up to 8th grade have the highest prevalence of hypertension, and those who graduated college have the lowest prevalence of hypertension.There is a general trend that as education level increases, the prevalence of hypertension decreases. A decreased knowledge of proper nutrition due to lack of education, high stress from lower paying jobs due to lower education level, decreased access to healthy and fresh foods due to lower income, and less time able to participate in physical activity because of more time spent working could be a few explanations as to why there is an increase in hypertension associated with lower education levels. This is important to know for public health in order for resources to be targeted toward those with lower education levels supporting a decrease in hypertension, access to nutrition information and healthy foods, and more.