# Load NHANES data
data(NHANES)
# Select adult participants with complete data
nhanes_adult <- NHANES %>%
filter(Age >= 18, Age <= 80) %>%
select(Age, Weight, Height, BMI, BPSysAve, BPDiaAve,
Pulse, PhysActive, SleepHrsNight) %>%
na.omit()
# Display sample
# Display sample size
data.frame(
Metric = "Sample Size",
Value = paste(nrow(nhanes_adult), "adults")
) %>%
kable()| Metric | Value |
|---|---|
| Sample Size | 7133 adults |
| Age | Weight | Height | BMI | BPSysAve | BPDiaAve | Pulse | PhysActive | SleepHrsNight |
|---|---|---|---|---|---|---|---|---|
| 34 | 87.4 | 164.7 | 32.2 | 113 | 85 | 70 | No | 4 |
| 34 | 87.4 | 164.7 | 32.2 | 113 | 85 | 70 | No | 4 |
| 34 | 87.4 | 164.7 | 32.2 | 113 | 85 | 70 | No | 4 |
| 49 | 86.7 | 168.4 | 30.6 | 112 | 75 | 86 | No | 8 |
| 45 | 75.7 | 166.7 | 27.2 | 118 | 64 | 62 | Yes | 8 |
| 45 | 75.7 | 166.7 | 27.2 | 118 | 64 | 62 | Yes | 8 |
| 45 | 75.7 | 166.7 | 27.2 | 118 | 64 | 62 | Yes | 8 |
| 66 | 68.0 | 169.5 | 23.7 | 111 | 63 | 60 | Yes | 7 |
Dataset Description:
Age: Age in yearsWeight: Weight in kgBMI: Body Mass Index (kg/m²)BPSysAve: Average systolic blood pressure (mmHg)BPDiaAve: Average diastolic blood pressure (mmHg)Pulse: 60 second pulse rateSleepHrsNight: Hours of sleep per nightNow it’s your turn to practice! Use the same NHANES dataset and follow the examples above.
Total Points: 25 points
Research Question: Is there a correlation between weight and height among US adults?
Your tasks:
cor.test() and
display with tidy() (3 points)# a. Scatterplot
ggplot(nhanes_adult, aes(x = Height, y = Weight)) +
geom_point(alpha = 0.3, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "Height vs Weight",
subtitle = "NHANES Data, Adults 18-80 years",
x = "Height (cm)",
y = "Weight (kgs)"
) +
theme_minimal()# b. Correlation test with tidy() display
# Calculate Pearson correlation
cor_height_weight <- cor.test(nhanes_adult$Height, nhanes_adult$Weight)
# Display results in clean table
tidy(cor_height_weight) %>%
select(estimate, statistic, p.value, conf.low, conf.high) %>%
kable(
digits = 3,
col.names = c("r", "t-statistic", "p-value", "95% CI Lower", "95% CI Upper"),
caption = "Pearson Correlation: Height and Weight"
)| r | t-statistic | p-value | 95% CI Lower | 95% CI Upper |
|---|---|---|---|---|
| 0.451 | 42.618 | 0 | 0.432 | 0.469 |
# Calculate r-squared
r_squared_height_weight <- cor_height_weight$estimate^2
data.frame(
Measure = c("r²", "Variance Explained"),
Value = c(
round(r_squared_height_weight, 4),
paste0(round(r_squared_height_weight * 100, 2), "%")
)
) %>%
kable(caption = "Effect Size")| Measure | Value | |
|---|---|---|
| cor | r² | 0.203 |
| Variance Explained | 20.3% |
# c. Statistical significance
# p<0.001 suggests statistical significance
# d. r² and interpretation (write as comment)
# R-squared value is 0.203. This means that 20.3% of the variation in weight can be explained by height.
# Hypothesis Test:
# H₀: ρ = 0 (no correlation between height and weight in population)
# H₁: ρ ≠ 0 (correlation exists)
# α = 0.05
# Results:
# r = 0.451: Moderate positive correlation
# p < 0.001: Statistically significant (reject H₀)
# 95% CI [0.432, 0.469]: Doesn’t contain zero (confirms significance)Research Question: What are the relationships among BMI, weight, and height?
Your tasks:
# a. Correlation matrix
# Select cardiovascular variables
BMI_vars <- nhanes_adult %>%
select(Weight, Height, BMI)
# Calculate correlation matrix
BMI_matrix <- cor(BMI_vars, use = "complete.obs")
# Display as table
BMI_matrix %>%
kable(digits = 3, caption = "BMI Correlation Matrix")| Weight | Height | BMI | |
|---|---|---|---|
| Weight | 1.000 | 0.451 | 0.880 |
| Height | 0.451 | 1.000 | -0.012 |
| BMI | 0.880 | -0.012 | 1.000 |
# b. Visualize with corrplot
# Create correlation plot
corrplot(BMI_matrix,
method = "circle",
type = "lower",
tl.col = "black",
tl.srt = 45,
addCoef.col = "black",
number.cex = 0.7,
col = colorRampPalette(c("#3498db", "white", "#e74c3c"))(200),
title = "BMI Correlations",
mar = c(0,0,2,0))# c. Strongest correlation:
# Weight and BMI
# d. Explanation (write as comment)
# Weight and BMI have a correlation of r = 0.88, which is a strongly positive correlation. The r-squared value of 0.774 suggests that weight accounts for 77.4% of the variation for BMI. This makes sense as BMI is a calculation of weight and height.Research Question: Is there a relationship between hours of sleep and age?
Your tasks:
tidy()
(2 points)# a. Scatterplot
ggplot(nhanes_adult, aes(x = Age, y = SleepHrsNight)) +
geom_point(alpha = 0.3, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "Age vs Sleep",
subtitle = "NHANES Data, Adults 18-80 years",
x = "Age (years)",
y = "Sleep (hours per night)"
) +
theme_minimal()# b. Correlation with tidy()
# Calculate Pearson correlation
cor_age_sleep <- cor.test(nhanes_adult$Age, nhanes_adult$SleepHrsNight)
# Display results in clean table
tidy(cor_age_sleep) %>%
select(estimate, statistic, p.value, conf.low, conf.high) %>%
kable(
digits = 3,
col.names = c("r", "t-statistic", "p-value", "95% CI Lower", "95% CI Upper"),
caption = "Pearson Correlation: Age and Sleep"
)| r | t-statistic | p-value | 95% CI Lower | 95% CI Upper |
|---|---|---|---|---|
| 0.023 | 1.904 | 0.057 | -0.001 | 0.046 |
# Calculate r-squared
r_squared_age_sleep <- cor_age_sleep$estimate^2
data.frame(
Measure = c("r²", "Variance Explained"),
Value = c(
round(r_squared_age_sleep, 4),
paste0(round(r_squared_age_sleep * 100, 2), "%")
)
) %>%
kable(caption = "Effect Size")| Measure | Value | |
|---|---|---|
| cor | r² | 5e-04 |
| Variance Explained | 0.05% |