summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
plot(pressure)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.1.6
## ✔ ggplot2 4.0.2 ✔ stringr 1.6.0
## ✔ lubridate 1.9.4 ✔ tibble 3.3.1
## ✔ purrr 1.2.1 ✔ tidyr 1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
## Warning: package 'haven' was built under R version 4.5.2
library(janitor)
## Warning: package 'janitor' was built under R version 4.5.2
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(knitr)
## Warning: package 'knitr' was built under R version 4.5.2
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.2
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(plotly)
## Warning: package 'plotly' was built under R version 4.5.2
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(broom)
library(ggeffects)
## Warning: package 'ggeffects' was built under R version 4.5.2
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.5.2
library(GGally)
## Warning: package 'GGally' was built under R version 4.5.2
library(car)
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.2
##
## Attaching package: 'car'
##
## The following object is masked from 'package:purrr':
##
## some
##
## The following object is masked from 'package:dplyr':
##
## recode
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.5.2
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.2
## corrplot 0.95 loaded
library(flextable)
## Warning: package 'flextable' was built under R version 4.5.3
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:gtsummary':
##
## continuous_summary
##
## The following objects are masked from 'package:plotly':
##
## highlight, style
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
## The following object is masked from 'package:purrr':
##
## compose
library(officer)
## Warning: package 'officer' was built under R version 4.5.2
library(scales)
## Warning: package 'scales' was built under R version 4.5.2
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ggplot2)
library(gtsummary)
library(gt)
## Warning: package 'gt' was built under R version 4.5.2
library(tableone)
## Warning: package 'tableone' was built under R version 4.5.2
library(survey)
## Warning: package 'survey' was built under R version 4.5.2
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: survival
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 4.5.3
## ResourceSelection 0.3-6 2023-06-27
demo <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\DEMO_L.xpt") %>% clean_names()
bpx <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\BPXO_L.xpt") %>% clean_names()
hypertension <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\BPQ_L.xpt") %>% clean_names()
sleep <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\SLQ_L.xpt") %>% clean_names()
bmi <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\BMX_L.xpt") %>% clean_names()
diabetes <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\DIQ_L.xpt") %>% clean_names()
smoking <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\SMQ_L.xpt") %>% clean_names()
names(demo)
## [1] "seqn" "sddsrvyr" "ridstatr" "riagendr" "ridageyr" "ridagemn"
## [7] "ridreth1" "ridreth3" "ridexmon" "ridexagm" "dmqmiliz" "dmdborn4"
## [13] "dmdyrusr" "dmdeduc2" "dmdmartz" "ridexprg" "dmdhhsiz" "dmdhrgnd"
## [19] "dmdhragz" "dmdhredz" "dmdhrmaz" "dmdhsedz" "wtint2yr" "wtmec2yr"
## [25] "sdmvstra" "sdmvpsu" "indfmpir"
names(bpx)
## [1] "seqn" "bpaoarm" "bpaocsz" "bpxosy1" "bpxodi1" "bpxosy2"
## [7] "bpxodi2" "bpxosy3" "bpxodi3" "bpxopls1" "bpxopls2" "bpxopls3"
names(hypertension)
## [1] "seqn" "bpq020" "bpq030" "bpq150" "bpq080" "bpq101d"
names(sleep)
## [1] "seqn" "slq300" "slq310" "sld012" "slq320" "slq330" "sld013"
names(bmi)
## [1] "seqn" "bmdstats" "bmxwt" "bmiwt" "bmxrecum" "bmirecum"
## [7] "bmxhead" "bmihead" "bmxht" "bmiht" "bmxbmi" "bmdbmic"
## [13] "bmxleg" "bmileg" "bmxarml" "bmiarml" "bmxarmc" "bmiarmc"
## [19] "bmxwaist" "bmiwaist" "bmxhip" "bmihip"
names(diabetes)
## [1] "seqn" "diq010" "did040" "diq160" "diq180" "diq050" "did060"
## [8] "diq060u" "diq070"
names(smoking)
## [1] "seqn" "smq020" "smq040" "smd641" "smd650" "smd100mn" "smq621"
## [8] "smd630" "smaquex2"
nhanes <- demo %>%
left_join(bpx, by = "seqn") %>%
left_join(hypertension, by = "seqn") %>%
left_join(sleep, by = "seqn") %>%
left_join(bmi, by = "seqn") %>%
left_join(diabetes, by = "seqn") %>%
left_join(smoking, by = "seqn")
missing_summary <- data.frame(
Variable = names(nhanes),
Missing_Count = colSums(is.na(nhanes)),
Missing_Percent = round(colSums(is.na(nhanes)) / nrow(nhanes) * 100, 2)
)
print(missing_summary[order(-missing_summary$Missing_Count), ][1:15, ])
## Variable Missing_Count Missing_Percent
## bmihead bmihead 11933 100.00
## bmirecum bmirecum 11915 99.85
## smd630 smd630 11910 99.81
## bmxhead bmxhead 11863 99.41
## bmiht bmiht 11799 98.88
## bmiarml bmiarml 11733 98.32
## bmiarmc bmiarmc 11728 98.28
## smd641 smd641 11660 97.71
## diq060u diq060u 11601 97.22
## did060 did060 11590 97.13
## bmiwt bmiwt 11588 97.11
## bmiwaist bmiwaist 11586 97.09
## bmihip bmihip 11572 96.97
## ridagemn ridagemn 11556 96.84
## bmileg bmileg 11537 96.68
nhanes <- nhanes %>%
filter(ridageyr >= 18)
nhanes <- nhanes %>%
mutate(
# Mean BP from readings 2 & 3 (standard NHANES protocol)
mean_sbp = rowMeans(cbind(bpxosy2, bpxosy3), na.rm = TRUE),
mean_dbp = rowMeans(cbind(bpxodi2, bpxodi3), na.rm = TRUE),
# Fall back to reading 1 if 2 & 3 missing
mean_sbp = ifelse(is.nan(mean_sbp), bpxosy1, mean_sbp),
mean_dbp = ifelse(is.nan(mean_dbp), bpxodi1, mean_dbp),
# BP medication
bp_meds = if_else(bpq150 == 1, 1L, 0L, missing = 0L),
hypertension = case_when(
mean_sbp >= 130 ~ 1,
mean_dbp >= 80 ~ 1,
bpq150 == 1 ~ 1,
mean_sbp < 130 & mean_dbp < 80 & (bpq150 == 2 | is.na(bpq150)) ~ 0,
TRUE ~ NA_real_
),
hypertension = factor(hypertension, levels = c(0,1), labels = c("No","Yes"))
)
# Check
table(nhanes$hypertension, useNA = "always")
##
## No Yes <NA>
## 2882 3824 1447
nhanes <- nhanes %>%
mutate(
# Sleep Duration
sld012 = ifelse(sld012 %in% c(77, 99), NA, sld012),
sld013 = ifelse(sld013 %in% c(77, 99), NA, sld013),
sleep_hrs = ((sld012 * 5) + (sld013 * 2)) / 7,
sleep_cat = case_when(
sleep_hrs < 7 ~ "Short (<7h)",
sleep_hrs >= 7 & sleep_hrs <= 9 ~ "Normal (7-9h)",
sleep_hrs > 9 ~ "Long (>9h)",
TRUE ~ NA_character_
),
sleep_cat = factor(
sleep_cat,
levels = c("Normal (7-9h)", "Short (<7h)", "Long (>9h)")
),
# Education (SES Proxy)
educ_cat = case_when(
dmdeduc2 == 1 ~ "<High School",
dmdeduc2 == 2 ~ "High School/GED",
dmdeduc2 == 3 ~ "Some College",
dmdeduc2 %in% c(4,5) ~ "College+",
TRUE ~ NA_character_
),
educ_cat = factor(
educ_cat,
levels = c("College+", "<High School", "High School/GED", "Some College")
),
# Covariates
age = ridageyr,
sex = factor(
if_else(riagendr == 1, "Male", "Female"),
levels = c("Female", "Male")
),
race_eth = case_when(
ridreth3 == 3 ~ "Non-Hispanic White",
ridreth3 == 4 ~ "Non-Hispanic Black",
ridreth3 == 1 ~ "Mexican American",
ridreth3 == 2 ~ "Other Hispanic",
ridreth3 == 6 ~ "Non-Hispanic Asian",
ridreth3 == 7 ~ "Other/Multiracial",
TRUE ~ NA_character_
),
race_eth = factor(
race_eth,
levels = c(
"Non-Hispanic White",
"Non-Hispanic Black",
"Mexican American",
"Other Hispanic",
"Non-Hispanic Asian",
"Other/Multiracial"
)
),
bmi = bmxbmi,
smoke_cat = case_when(
smq020 == 2 ~ "Never",
smq020 == 1 & smq040 %in% c(1,2) ~ "Current",
smq020 == 1 & (smq040 == 3 | is.na(smq040)) ~ "Former",
TRUE ~ NA_character_
),
smoke_cat = factor(
smoke_cat,
levels = c("Never", "Former", "Current"))
)
nhanes <- nhanes %>%
mutate(
# Diabetes
diabetes = case_when(
diq010 == 1 ~ "Yes",
diq010 == 2 ~ "No",
TRUE ~ NA_character_
),
diabetes = factor(diabetes, levels = c("No", "Yes"))
)
# Check
table(nhanes$diabetes, useNA = "always")
##
## No Yes <NA>
## 6803 1073 277
analytic_vars <- c("seqn", "hypertension", "sleep_cat", "sleep_hrs", "educ_cat",
"age", "sex", "race_eth", "bmi", "smoke_cat", "diabetes",
"mean_sbp", "mean_dbp", "bp_meds", "wtmec2yr", "sdmvpsu", "sdmvstra")
nhanes_analytic <- nhanes %>%
select(all_of(analytic_vars)) %>%
filter(
!is.na(hypertension),
!is.na(sleep_hrs),
!is.na(sleep_cat),
!is.na(educ_cat),
!is.na(age),
!is.na(sex),
!is.na(race_eth),
!is.na(bmi),
!is.na(smoke_cat),
!is.na(diabetes)
)
cat("Analytic sample N =", nrow(nhanes_analytic), "\n")
## Analytic sample N = 5566
cat("Hypertension prevalence:", round(mean(nhanes_analytic$hypertension == "Yes") * 100, 1), "%\n")
## Hypertension prevalence: 54.6 %
nhanes_analytic$hypertension <- relevel(nhanes_analytic$hypertension, ref = "No")
# NHANES 2021-2023 uses standard 2-year weights
svy_design <- svydesign(
id = ~sdmvpsu,
strata = ~sdmvstra,
weights = ~wtmec2yr,
data = nhanes_analytic,
nest = TRUE
)
tbl1_vars <- c("age", "sex", "race_eth", "educ_cat", "bmi",
"smoke_cat", "diabetes", "hypertension", "mean_sbp", "mean_dbp")
cat_vars <- c("sex", "race_eth", "educ_cat", "smoke_cat", "diabetes", "hypertension")
tbl1 <- CreateTableOne(
vars = tbl1_vars,
strata = "sleep_cat",
data = nhanes_analytic,
factorVars = cat_vars,
addOverall = TRUE,
test = TRUE
)
print(tbl1, showAllLevels = TRUE, formatOptions = list(big.mark = ","))
## Stratified by sleep_cat
## level Overall Normal (7-9h)
## n 5,566 3,635
## age (mean (SD)) 53.61 (17.15) 53.50 (17.13)
## sex (%) Female 3074 (55.2) 2003 (55.1)
## Male 2492 (44.8) 1632 (44.9)
## race_eth (%) Non-Hispanic White 3332 (59.9) 2322 (63.9)
## Non-Hispanic Black 663 (11.9) 340 ( 9.4)
## Mexican American 365 ( 6.6) 226 ( 6.2)
## Other Hispanic 550 ( 9.9) 335 ( 9.2)
## Non-Hispanic Asian 296 ( 5.3) 202 ( 5.6)
## Other/Multiracial 360 ( 6.5) 210 ( 5.8)
## educ_cat (%) College+ 3727 (67.0) 2600 (71.5)
## <High School 250 ( 4.5) 123 ( 3.4)
## High School/GED 424 ( 7.6) 222 ( 6.1)
## Some College 1165 (20.9) 690 (19.0)
## bmi (mean (SD)) 29.71 (7.22) 29.36 (7.01)
## smoke_cat (%) Never 3288 (59.1) 2249 (61.9)
## Former 1457 (26.2) 947 (26.1)
## Current 821 (14.8) 439 (12.1)
## diabetes (%) No 4747 (85.3) 3163 (87.0)
## Yes 819 (14.7) 472 (13.0)
## hypertension (%) No 2525 (45.4) 1732 (47.6)
## Yes 3041 (54.6) 1903 (52.4)
## mean_sbp (mean (SD)) 122.64 (18.03) 122.12 (17.46)
## mean_dbp (mean (SD)) 74.48 (10.96) 74.15 (10.70)
## Stratified by sleep_cat
## Short (<7h) Long (>9h) p test
## n 1,120 811
## age (mean (SD)) 53.76 (15.82) 53.90 (18.89) 0.795
## sex (%) 576 (51.4) 495 (61.0) <0.001
## 544 (48.6) 316 (39.0)
## race_eth (%) 583 (52.1) 427 (52.7) <0.001
## 196 (17.5) 127 (15.7)
## 67 ( 6.0) 72 ( 8.9)
## 120 (10.7) 95 (11.7)
## 64 ( 5.7) 30 ( 3.7)
## 90 ( 8.0) 60 ( 7.4)
## educ_cat (%) 705 (62.9) 422 (52.0) <0.001
## 55 ( 4.9) 72 ( 8.9)
## 98 ( 8.8) 104 (12.8)
## 262 (23.4) 213 (26.3)
## bmi (mean (SD)) 30.65 (7.52) 30.00 (7.63) <0.001
## smoke_cat (%) 581 (51.9) 458 (56.5) <0.001
## 300 (26.8) 210 (25.9)
## 239 (21.3) 143 (17.6)
## diabetes (%) 932 (83.2) 652 (80.4) <0.001
## 188 (16.8) 159 (19.6)
## hypertension (%) 465 (41.5) 328 (40.4) <0.001
## 655 (58.5) 483 (59.6)
## mean_sbp (mean (SD)) 123.66 (18.30) 123.58 (20.02) 0.013
## mean_dbp (mean (SD)) 75.70 (11.48) 74.27 (11.28) <0.001
# Table 1 with gtsummary
tbl1_pub <- nhanes_analytic %>%
select(all_of(tbl1_vars), sleep_cat) %>%
tbl_summary(
by = sleep_cat,
label = list(
age ~ "Age, years",
sex ~ "Sex",
race_eth ~ "Race/Ethnicity",
educ_cat ~ "Education Level",
bmi ~ "BMI (kg/m²)",
smoke_cat ~ "Smoking Status",
diabetes ~ "Diabetes",
hypertension ~ "Hypertension",
mean_sbp ~ "Systolic BP (mmHg)",
mean_dbp ~ "Diastolic BP (mmHg)"
),
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 1,
missing = "no"
) %>%
add_overall() %>%
add_p(test = list(all_continuous() ~ "aov", all_categorical() ~ "chisq.test")) %>%
bold_p(t = 0.05) %>%
modify_spanning_header(c("stat_1","stat_2","stat_3") ~ "**Sleep Duration Category**") %>%
modify_caption("**Table 1.** Participant Characteristics by Sleep Duration (NHANES 2021-2023, N = {N})")
## The following warnings were returned during `modify_caption()`:
## ! For variable `age` (`sleep_cat`) and "statistic" and "p.value" statistics:
## The test "aov" in `add_p(test)` was deprecated in gtsummary 2.0.0. ℹ The same
## functionality is covered in "oneway.test" with argument `var.equal = TRUE`.
## ! For variable `bmi` (`sleep_cat`) and "statistic" and "p.value" statistics:
## The test "aov" in `add_p(test)` was deprecated in gtsummary 2.0.0. ℹ The same
## functionality is covered in "oneway.test" with argument `var.equal = TRUE`.
## ! For variable `mean_dbp` (`sleep_cat`) and "statistic" and "p.value"
## statistics: The test "aov" in `add_p(test)` was deprecated in gtsummary
## 2.0.0. ℹ The same functionality is covered in "oneway.test" with argument
## `var.equal = TRUE`.
## ! For variable `mean_sbp` (`sleep_cat`) and "statistic" and "p.value"
## statistics: The test "aov" in `add_p(test)` was deprecated in gtsummary
## 2.0.0. ℹ The same functionality is covered in "oneway.test" with argument
## `var.equal = TRUE`.
tbl1_pub
| Characteristic | Overall N = 5,5661 |
Sleep Duration Category
|
p-value2 | ||
|---|---|---|---|---|---|
| Normal (7-9h) N = 3,6351 |
Short (<7h) N = 1,1201 |
Long (>9h) N = 8111 |
|||
| Age, years | 53.6 (17.1) | 53.5 (17.1) | 53.8 (15.8) | 53.9 (18.9) | 0.8 |
| Sex | <0.001 | ||||
| Female | 3,074 (55%) | 2,003 (55%) | 576 (51%) | 495 (61%) | |
| Male | 2,492 (45%) | 1,632 (45%) | 544 (49%) | 316 (39%) | |
| Race/Ethnicity | <0.001 | ||||
| Non-Hispanic White | 3,332 (60%) | 2,322 (64%) | 583 (52%) | 427 (53%) | |
| Non-Hispanic Black | 663 (12%) | 340 (9.4%) | 196 (18%) | 127 (16%) | |
| Mexican American | 365 (6.6%) | 226 (6.2%) | 67 (6.0%) | 72 (8.9%) | |
| Other Hispanic | 550 (9.9%) | 335 (9.2%) | 120 (11%) | 95 (12%) | |
| Non-Hispanic Asian | 296 (5.3%) | 202 (5.6%) | 64 (5.7%) | 30 (3.7%) | |
| Other/Multiracial | 360 (6.5%) | 210 (5.8%) | 90 (8.0%) | 60 (7.4%) | |
| Education Level | <0.001 | ||||
| College+ | 3,727 (67%) | 2,600 (72%) | 705 (63%) | 422 (52%) | |
| <High School | 250 (4.5%) | 123 (3.4%) | 55 (4.9%) | 72 (8.9%) | |
| High School/GED | 424 (7.6%) | 222 (6.1%) | 98 (8.8%) | 104 (13%) | |
| Some College | 1,165 (21%) | 690 (19%) | 262 (23%) | 213 (26%) | |
| BMI (kg/m²) | 29.7 (7.2) | 29.4 (7.0) | 30.7 (7.5) | 30.0 (7.6) | <0.001 |
| Smoking Status | <0.001 | ||||
| Never | 3,288 (59%) | 2,249 (62%) | 581 (52%) | 458 (56%) | |
| Former | 1,457 (26%) | 947 (26%) | 300 (27%) | 210 (26%) | |
| Current | 821 (15%) | 439 (12%) | 239 (21%) | 143 (18%) | |
| Diabetes | 819 (15%) | 472 (13%) | 188 (17%) | 159 (20%) | <0.001 |
| Hypertension | 3,041 (55%) | 1,903 (52%) | 655 (58%) | 483 (60%) | <0.001 |
| Systolic BP (mmHg) | 122.6 (18.0) | 122.1 (17.5) | 123.7 (18.3) | 123.6 (20.0) | 0.013 |
| Diastolic BP (mmHg) | 74.5 (11.0) | 74.2 (10.7) | 75.7 (11.5) | 74.3 (11.3) | <0.001 |
| 1 Mean (SD); n (%) | |||||
| 2 One-way analysis of means; Pearson’s Chi-squared test | |||||
#Distribution of Systolic Blood Pressure (Outcome Variable)
ggplot(nhanes_analytic %>% filter(!is.na(mean_sbp)),
aes(x = mean_sbp)) +
geom_histogram(aes(y = after_stat(density)), # ← updated notation
bins = 40, fill = '#2E5496', alpha = 0.7) +
geom_density(color = 'red', linewidth = 1) +
geom_vline(xintercept = 130, linetype = 'dashed', color = 'darkred') +
annotate("text", x = 133, y = 0.025, # ← optional: label the line
label = "130 mmHg\nthreshold",
color = "darkred", size = 3, hjust = 0) +
labs(
title = "Figure 1. Distribution of Systolic Blood Pressure",
subtitle = "NHANES 2021–2023 (N = 5,661)",
x = "Mean Systolic BP (mmHg)",
y = "Density"
) +
theme_minimal()
#Hypertension Prevalence by Sleep Duration
fig2_data <- nhanes_analytic %>%
group_by(sleep_cat, hypertension) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(sleep_cat) %>%
mutate(prop = n / sum(n)) %>%
filter(as.character(hypertension) == "Yes") # ← fix factor comparison
fig2 <- ggplot(fig2_data, aes(x = sleep_cat, y = prop, fill = sleep_cat)) +
geom_col(width = 0.6, alpha = 0.85) +
geom_text(aes(label = sprintf("%.1f%%", prop * 100)),
vjust = -0.4, size = 3.5, fontface = "bold") +
scale_y_continuous(
labels = scales::percent_format(),
limits = c(0, max(fig2_data$prop) * 1.15)
) +
scale_fill_manual(values = c("#1565C0", "#D32F2F", "#388E3C")) +
labs(
title = "Figure 2. Hypertension Prevalence by Sleep Duration",
subtitle = "NHANES 2021–2023",
x = "Sleep Duration Category",
y = "Proportion Hypertensive (%)"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
print(fig2)
ggsave("fig2_hypertension_by_sleep.png", fig2, width = 7, height = 5, dpi = 300)
#Hypertension Prevalence by Education Level and Sleep Duration
nhanes_analytic %>%
group_by(educ_cat, sleep_cat, hypertension) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(educ_cat, sleep_cat) %>%
mutate(prop = n / sum(n)) %>%
filter(as.character(hypertension) == "Yes") %>% # ← fix factor comparison
ggplot(aes(x = educ_cat, y = prop, fill = sleep_cat)) +
geom_col(position = "dodge", alpha = 0.85) +
scale_fill_manual(values = c(
"Normal (7-9h)" = "#1565C0",
"Short (<7h)" = "#D32F2F",
"Long (>9h)" = "#388E3C"
)) +
scale_y_continuous(
labels = scales::percent_format(),
limits = c(0, 0.9)
) +
geom_text(aes(label = scales::percent(prop, accuracy = 1)),
position = position_dodge(0.9),
vjust = -0.4, size = 3, fontface = "bold") +
labs(
title = "Figure 3. Hypertension Prevalence by Education and Sleep Duration",
subtitle = "NHANES 2021–2023",
x = "Education Level",
y = "Proportion Hypertensive (%)",
fill = "Sleep Duration"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 35, hjust = 1),
legend.position = "bottom",
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
#Regression Analysis # Model 1: Crude
model_crude <- glm(
hypertension~ sleep_cat,
data = nhanes_analytic,
family = binomial(link = "logit")
)
# Model 2: Adjusted
model_adj <- glm(
hypertension ~ sleep_cat + educ_cat + age + sex + race_eth + bmi + smoke_cat + diabetes,
data = nhanes_analytic,
family = binomial(link = "logit")
)
model_crude <- glm(
hypertension ~ sleep_cat,
data = nhanes_analytic,
family = binomial(link = "logit")
)
crude_tbl <- tidy(model_crude, exponentiate = TRUE, conf.int = TRUE) %>%
filter(term != "(Intercept)") %>%
mutate(
term = dplyr::recode(term,
"sleep_catShort (<7h)" = "Short sleep (<7h) vs. Normal",
"sleep_catLong (>9h)" = "Long sleep (>9h) vs. Normal"),
OR_CI = sprintf("%.2f (%.2f–%.2f)", estimate, conf.low, conf.high),
p_fmt = ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
) %>%
select(term, OR_CI, p_fmt)
cat("\n=== Crude Logistic Regression ===\n")
##
## === Crude Logistic Regression ===
print(crude_tbl)
## # A tibble: 2 × 3
## term OR_CI p_fmt
## <chr> <chr> <chr>
## 1 Short sleep (<7h) vs. Normal 1.28 (1.12–1.47) <0.001
## 2 Long sleep (>9h) vs. Normal 1.34 (1.15–1.57) <0.001
tbl_crude <- model_crude %>%
tbl_regression(
exponentiate = TRUE,
label = list(sleep_cat ~ "Sleep Duration")
) %>%
modify_header(estimate = "**Crude OR**")
tbl_adj <- model_adj %>%
tbl_regression(
exponentiate = TRUE,
label = list(
sleep_cat ~ "Sleep Duration",
educ_cat ~ "Education Level",
age ~ "Age (years)",
sex ~ "Sex",
race_eth ~ "Race/Ethnicity",
bmi ~ "BMI (kg/m²)",
smoke_cat ~ "Smoking Status",
diabetes ~ "Diabetes"
)
) %>%
modify_header(estimate = "**Adjusted OR**")
tbl_merge(
list(tbl_crude, tbl_adj),
tab_spanner = c("**Crude Model**", "**Adjusted Model**")
) %>%
modify_caption("**Table 2.** Logistic Regression: Sleep Duration and Hypertension Risk (NHANES 2021–2023)")
## The number rows in the tables to be merged do not match, which may result in
## rows appearing out of order.
## ℹ See `tbl_merge()` (`?gtsummary::tbl_merge()`) help file for details. Use
## `quiet=TRUE` to silence message.
| Characteristic |
Crude Model
|
Adjusted Model
|
||||
|---|---|---|---|---|---|---|
| Crude OR | 95% CI | p-value | Adjusted OR | 95% CI | p-value | |
| Sleep Duration | ||||||
| Normal (7-9h) | — | — | — | — | ||
| Short (<7h) | 1.28 | 1.12, 1.47 | <0.001 | 1.07 | 0.92, 1.25 | 0.4 |
| Long (>9h) | 1.34 | 1.15, 1.57 | <0.001 | 1.22 | 1.02, 1.47 | 0.032 |
| Education Level | ||||||
| College+ | — | — | ||||
| <High School | 1.35 | 0.99, 1.87 | 0.063 | |||
| High School/GED | 1.34 | 1.05, 1.72 | 0.021 | |||
| Some College | 1.17 | 1.00, 1.37 | 0.046 | |||
| Age (years) | 1.05 | 1.05, 1.06 | <0.001 | |||
| Sex | ||||||
| Female | — | — | ||||
| Male | 1.48 | 1.30, 1.67 | <0.001 | |||
| Race/Ethnicity | ||||||
| Non-Hispanic White | — | — | ||||
| Non-Hispanic Black | 1.78 | 1.45, 2.18 | <0.001 | |||
| Mexican American | 0.89 | 0.68, 1.15 | 0.4 | |||
| Other Hispanic | 0.89 | 0.72, 1.11 | 0.3 | |||
| Non-Hispanic Asian | 1.64 | 1.25, 2.17 | <0.001 | |||
| Other/Multiracial | 1.12 | 0.87, 1.44 | 0.4 | |||
| BMI (kg/m²) | 1.09 | 1.08, 1.10 | <0.001 | |||
| Smoking Status | ||||||
| Never | — | — | ||||
| Former | 1.08 | 0.94, 1.25 | 0.3 | |||
| Current | 1.23 | 1.02, 1.47 | 0.028 | |||
| Diabetes | ||||||
| No | — | — | ||||
| Yes | 1.67 | 1.37, 2.03 | <0.001 | |||
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | ||||||
model_adj <- glm(
hypertension ~ sleep_cat + educ_cat + age + sex + race_eth + bmi + smoke_cat + diabetes,
data = nhanes_analytic,
family = binomial(link = "logit")
)
adj_tbl <- tidy(model_adj, exponentiate = TRUE, conf.int = TRUE) %>%
filter(term != "(Intercept)") %>%
mutate(
term = dplyr::recode(term,
"sleep_catShort (<7h)" = "Short sleep (<7h) vs. Normal",
"sleep_catLong (>9h)" = "Long sleep (>9h) vs. Normal",
"educ_cat<High School" = "Education: <High School vs. College+",
"educ_catHigh School/GED" = "Education: High School/GED vs. College+",
"educ_catSome College" = "Education: Some College vs. College+",
"sexMale" = "Sex: Male vs. Female",
"race_ethNon-Hispanic Black" = "Race: Non-Hispanic Black vs. White",
"race_ethMexican American" = "Race: Mexican American vs. White",
"race_ethOther Hispanic" = "Race: Other Hispanic vs. White",
"race_ethNon-Hispanic Asian" = "Race: Non-Hispanic Asian vs. White",
"race_ethOther/Multiracial" = "Race: Other/Multiracial vs. White",
"smoke_catFormer" = "Smoking: Former vs. Never",
"smoke_catCurrent" = "Smoking: Current vs. Never"
),
aOR_CI = sprintf("%.2f (%.2f–%.2f)", estimate, conf.low, conf.high),
p_fmt = ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
) %>%
select(term, aOR_CI, p_fmt)
cat("\n=== Adjusted Logistic Regression ===\n")
##
## === Adjusted Logistic Regression ===
print(adj_tbl, n = 30)
## # A tibble: 16 × 3
## term aOR_CI p_fmt
## <chr> <chr> <chr>
## 1 Short sleep (<7h) vs. Normal 1.07 (0.92–1.25) 0.376
## 2 Long sleep (>9h) vs. Normal 1.22 (1.02–1.47) 0.032
## 3 Education: <High School vs. College+ 1.35 (0.99–1.87) 0.063
## 4 Education: High School/GED vs. College+ 1.34 (1.05–1.72) 0.021
## 5 Education: Some College vs. College+ 1.17 (1.00–1.37) 0.046
## 6 age 1.05 (1.05–1.06) <0.001
## 7 Sex: Male vs. Female 1.48 (1.30–1.67) <0.001
## 8 Race: Non-Hispanic Black vs. White 1.78 (1.45–2.18) <0.001
## 9 Race: Mexican American vs. White 0.89 (0.68–1.15) 0.369
## 10 Race: Other Hispanic vs. White 0.89 (0.72–1.11) 0.302
## 11 Race: Non-Hispanic Asian vs. White 1.64 (1.25–2.17) <0.001
## 12 Race: Other/Multiracial vs. White 1.12 (0.87–1.44) 0.381
## 13 bmi 1.09 (1.08–1.10) <0.001
## 14 Smoking: Former vs. Never 1.08 (0.94–1.25) 0.288
## 15 Smoking: Current vs. Never 1.23 (1.02–1.47) 0.028
## 16 diabetesYes 1.67 (1.37–2.03) <0.001
model_int <- glm(
hypertension ~ sleep_cat * educ_cat + age + sex + race_eth + bmi + smoke_cat + diabetes,
data = nhanes_analytic,
family = binomial(link = "logit")
)
# Likelihood ratio test for interaction
lrt <- anova(model_adj, model_int, test = "LRT")
cat("\n=== Likelihood Ratio Test for Interaction ===\n")
##
## === Likelihood Ratio Test for Interaction ===
print(lrt)
## Analysis of Deviance Table
##
## Model 1: hypertension ~ sleep_cat + educ_cat + age + sex + race_eth +
## bmi + smoke_cat + diabetes
## Model 2: hypertension ~ sleep_cat * educ_cat + age + sex + race_eth +
## bmi + smoke_cat + diabetes
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 5549 6234.6
## 2 5543 6232.3 6 2.327 0.8873
cat("Interaction p-value:", lrt$"Pr(>Chi)"[2], "\n")
## Interaction p-value: 0.8872993
cat("\n=== Stratified Analysis by Education ===\n")
##
## === Stratified Analysis by Education ===
educ_levels <- levels(nhanes_analytic$educ_cat)
strat_results <- lapply(educ_levels, function(edu) {
sub <- nhanes_analytic %>% filter(educ_cat == edu)
if (nrow(sub) < 50) return(NULL)
mod <- glm(
hypertension ~ sleep_cat + age + sex + race_eth + bmi + smoke_cat + diabetes,
data = sub, family = binomial(link = "logit")
)
tidy(mod, exponentiate = TRUE, conf.int = TRUE) %>%
filter(grepl("sleep_cat", term)) %>%
mutate(
education = edu,
n = nrow(sub),
term =dplyr:: recode(term,
"sleep_catShort (<7h)" = "Short (<7h) vs. Normal",
"sleep_catLong (>9h)" = "Long (>9h) vs. Normal"),
aOR_CI = sprintf("%.2f (%.2f–%.2f)", estimate, conf.low, conf.high),
p_fmt = ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
) %>%
select(education, n, term, estimate, conf.low, conf.high, aOR_CI, p_fmt, p.value)
})
strat_df <- bind_rows(strat_results)
cat("\nStratified aORs:\n")
##
## Stratified aORs:
print(strat_df %>% select(education, n, term, aOR_CI, p_fmt), n = 20)
## # A tibble: 8 × 5
## education n term aOR_CI p_fmt
## <chr> <int> <chr> <chr> <chr>
## 1 College+ 3727 Short (<7h) vs. Normal 1.13 (0.93–1.37) 0.209
## 2 College+ 3727 Long (>9h) vs. Normal 1.20 (0.94–1.52) 0.146
## 3 <High School 250 Short (<7h) vs. Normal 0.77 (0.35–1.67) 0.501
## 4 <High School 250 Long (>9h) vs. Normal 0.97 (0.45–2.10) 0.937
## 5 High School/GED 424 Short (<7h) vs. Normal 1.00 (0.56–1.82) 0.989
## 6 High School/GED 424 Long (>9h) vs. Normal 1.38 (0.78–2.51) 0.275
## 7 Some College 1165 Short (<7h) vs. Normal 1.01 (0.73–1.41) 0.952
## 8 Some College 1165 Long (>9h) vs. Normal 1.25 (0.86–1.82) 0.238
tbl_crude <- model_crude %>%
tbl_regression(
exponentiate = TRUE,
label = list(sleep_cat ~ "Sleep Duration"),
) %>%
modify_header(estimate = "**Crude OR**")
tbl_adj <- model_adj %>%
tbl_regression(
exponentiate = TRUE,
label = list(
sleep_cat ~ "Sleep Duration",
educ_cat ~ "Education Level",
age ~ "Age (years)",
sex ~ "Sex",
race_eth ~ "Race/Ethnicity",
bmi ~ "BMI (kg/m²)",
smoke_cat ~ "Smoking Status",
diabetes ~ "Diabetes"
)
) %>%
modify_header(estimate = "**Adjusted OR**")
tbl_merge <- tbl_merge(
list(tbl_crude, tbl_adj),
tab_spanner = c("**Crude Model**", "**Adjusted Model**")
) %>%
modify_caption("**Table 2.** Logistic Regression: Sleep Duration and Hypertension (NHANES 2021–2023)")
## The number rows in the tables to be merged do not match, which may result in
## rows appearing out of order.
## ℹ See `tbl_merge()` (`?gtsummary::tbl_merge()`) help file for details. Use
## `quiet=TRUE` to silence message.
print(tbl_merge)
## <div id="wgdivzjzhr" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
## <style>#wgdivzjzhr table {
## font-family: system-ui, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji';
## -webkit-font-smoothing: antialiased;
## -moz-osx-font-smoothing: grayscale;
## }
##
## #wgdivzjzhr thead, #wgdivzjzhr tbody, #wgdivzjzhr tfoot, #wgdivzjzhr tr, #wgdivzjzhr td, #wgdivzjzhr th {
## border-style: none;
## }
##
## #wgdivzjzhr p {
## margin: 0;
## padding: 0;
## }
##
## #wgdivzjzhr .gt_table {
## display: table;
## border-collapse: collapse;
## line-height: normal;
## margin-left: auto;
## margin-right: auto;
## color: #333333;
## font-size: 16px;
## font-weight: normal;
## font-style: normal;
## background-color: #FFFFFF;
## width: auto;
## border-top-style: solid;
## border-top-width: 2px;
## border-top-color: #A8A8A8;
## border-right-style: none;
## border-right-width: 2px;
## border-right-color: #D3D3D3;
## border-bottom-style: solid;
## border-bottom-width: 2px;
## border-bottom-color: #A8A8A8;
## border-left-style: none;
## border-left-width: 2px;
## border-left-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_caption {
## padding-top: 4px;
## padding-bottom: 4px;
## }
##
## #wgdivzjzhr .gt_title {
## color: #333333;
## font-size: 125%;
## font-weight: initial;
## padding-top: 4px;
## padding-bottom: 4px;
## padding-left: 5px;
## padding-right: 5px;
## border-bottom-color: #FFFFFF;
## border-bottom-width: 0;
## }
##
## #wgdivzjzhr .gt_subtitle {
## color: #333333;
## font-size: 85%;
## font-weight: initial;
## padding-top: 3px;
## padding-bottom: 5px;
## padding-left: 5px;
## padding-right: 5px;
## border-top-color: #FFFFFF;
## border-top-width: 0;
## }
##
## #wgdivzjzhr .gt_heading {
## background-color: #FFFFFF;
## text-align: center;
## border-bottom-color: #FFFFFF;
## border-left-style: none;
## border-left-width: 1px;
## border-left-color: #D3D3D3;
## border-right-style: none;
## border-right-width: 1px;
## border-right-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_bottom_border {
## border-bottom-style: solid;
## border-bottom-width: 2px;
## border-bottom-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_col_headings {
## border-top-style: solid;
## border-top-width: 2px;
## border-top-color: #D3D3D3;
## border-bottom-style: solid;
## border-bottom-width: 2px;
## border-bottom-color: #D3D3D3;
## border-left-style: none;
## border-left-width: 1px;
## border-left-color: #D3D3D3;
## border-right-style: none;
## border-right-width: 1px;
## border-right-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_col_heading {
## color: #333333;
## background-color: #FFFFFF;
## font-size: 100%;
## font-weight: normal;
## text-transform: inherit;
## border-left-style: none;
## border-left-width: 1px;
## border-left-color: #D3D3D3;
## border-right-style: none;
## border-right-width: 1px;
## border-right-color: #D3D3D3;
## vertical-align: bottom;
## padding-top: 5px;
## padding-bottom: 6px;
## padding-left: 5px;
## padding-right: 5px;
## overflow-x: hidden;
## }
##
## #wgdivzjzhr .gt_column_spanner_outer {
## color: #333333;
## background-color: #FFFFFF;
## font-size: 100%;
## font-weight: normal;
## text-transform: inherit;
## padding-top: 0;
## padding-bottom: 0;
## padding-left: 4px;
## padding-right: 4px;
## }
##
## #wgdivzjzhr .gt_column_spanner_outer:first-child {
## padding-left: 0;
## }
##
## #wgdivzjzhr .gt_column_spanner_outer:last-child {
## padding-right: 0;
## }
##
## #wgdivzjzhr .gt_column_spanner {
## border-bottom-style: solid;
## border-bottom-width: 2px;
## border-bottom-color: #D3D3D3;
## vertical-align: bottom;
## padding-top: 5px;
## padding-bottom: 5px;
## overflow-x: hidden;
## display: inline-block;
## width: 100%;
## }
##
## #wgdivzjzhr .gt_spanner_row {
## border-bottom-style: hidden;
## }
##
## #wgdivzjzhr .gt_group_heading {
## padding-top: 8px;
## padding-bottom: 8px;
## padding-left: 5px;
## padding-right: 5px;
## color: #333333;
## background-color: #FFFFFF;
## font-size: 100%;
## font-weight: initial;
## text-transform: inherit;
## border-top-style: solid;
## border-top-width: 2px;
## border-top-color: #D3D3D3;
## border-bottom-style: solid;
## border-bottom-width: 2px;
## border-bottom-color: #D3D3D3;
## border-left-style: none;
## border-left-width: 1px;
## border-left-color: #D3D3D3;
## border-right-style: none;
## border-right-width: 1px;
## border-right-color: #D3D3D3;
## vertical-align: middle;
## text-align: left;
## }
##
## #wgdivzjzhr .gt_empty_group_heading {
## padding: 0.5px;
## color: #333333;
## background-color: #FFFFFF;
## font-size: 100%;
## font-weight: initial;
## border-top-style: solid;
## border-top-width: 2px;
## border-top-color: #D3D3D3;
## border-bottom-style: solid;
## border-bottom-width: 2px;
## border-bottom-color: #D3D3D3;
## vertical-align: middle;
## }
##
## #wgdivzjzhr .gt_from_md > :first-child {
## margin-top: 0;
## }
##
## #wgdivzjzhr .gt_from_md > :last-child {
## margin-bottom: 0;
## }
##
## #wgdivzjzhr .gt_row {
## padding-top: 8px;
## padding-bottom: 8px;
## padding-left: 5px;
## padding-right: 5px;
## margin: 10px;
## border-top-style: solid;
## border-top-width: 1px;
## border-top-color: #D3D3D3;
## border-left-style: none;
## border-left-width: 1px;
## border-left-color: #D3D3D3;
## border-right-style: none;
## border-right-width: 1px;
## border-right-color: #D3D3D3;
## vertical-align: middle;
## overflow-x: hidden;
## }
##
## #wgdivzjzhr .gt_stub {
## color: #333333;
## background-color: #FFFFFF;
## font-size: 100%;
## font-weight: initial;
## text-transform: inherit;
## border-right-style: solid;
## border-right-width: 2px;
## border-right-color: #D3D3D3;
## padding-left: 5px;
## padding-right: 5px;
## }
##
## #wgdivzjzhr .gt_stub_row_group {
## color: #333333;
## background-color: #FFFFFF;
## font-size: 100%;
## font-weight: initial;
## text-transform: inherit;
## border-right-style: solid;
## border-right-width: 2px;
## border-right-color: #D3D3D3;
## padding-left: 5px;
## padding-right: 5px;
## vertical-align: top;
## }
##
## #wgdivzjzhr .gt_row_group_first td {
## border-top-width: 2px;
## }
##
## #wgdivzjzhr .gt_row_group_first th {
## border-top-width: 2px;
## }
##
## #wgdivzjzhr .gt_summary_row {
## color: #333333;
## background-color: #FFFFFF;
## text-transform: inherit;
## padding-top: 8px;
## padding-bottom: 8px;
## padding-left: 5px;
## padding-right: 5px;
## }
##
## #wgdivzjzhr .gt_first_summary_row {
## border-top-style: solid;
## border-top-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_first_summary_row.thick {
## border-top-width: 2px;
## }
##
## #wgdivzjzhr .gt_last_summary_row {
## padding-top: 8px;
## padding-bottom: 8px;
## padding-left: 5px;
## padding-right: 5px;
## border-bottom-style: solid;
## border-bottom-width: 2px;
## border-bottom-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_grand_summary_row {
## color: #333333;
## background-color: #FFFFFF;
## text-transform: inherit;
## padding-top: 8px;
## padding-bottom: 8px;
## padding-left: 5px;
## padding-right: 5px;
## }
##
## #wgdivzjzhr .gt_first_grand_summary_row {
## padding-top: 8px;
## padding-bottom: 8px;
## padding-left: 5px;
## padding-right: 5px;
## border-top-style: double;
## border-top-width: 6px;
## border-top-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_last_grand_summary_row_top {
## padding-top: 8px;
## padding-bottom: 8px;
## padding-left: 5px;
## padding-right: 5px;
## border-bottom-style: double;
## border-bottom-width: 6px;
## border-bottom-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_striped {
## background-color: rgba(128, 128, 128, 0.05);
## }
##
## #wgdivzjzhr .gt_table_body {
## border-top-style: solid;
## border-top-width: 2px;
## border-top-color: #D3D3D3;
## border-bottom-style: solid;
## border-bottom-width: 2px;
## border-bottom-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_footnotes {
## color: #333333;
## background-color: #FFFFFF;
## border-bottom-style: none;
## border-bottom-width: 2px;
## border-bottom-color: #D3D3D3;
## border-left-style: none;
## border-left-width: 2px;
## border-left-color: #D3D3D3;
## border-right-style: none;
## border-right-width: 2px;
## border-right-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_footnote {
## margin: 0px;
## font-size: 90%;
## padding-top: 4px;
## padding-bottom: 4px;
## padding-left: 5px;
## padding-right: 5px;
## }
##
## #wgdivzjzhr .gt_sourcenotes {
## color: #333333;
## background-color: #FFFFFF;
## border-bottom-style: none;
## border-bottom-width: 2px;
## border-bottom-color: #D3D3D3;
## border-left-style: none;
## border-left-width: 2px;
## border-left-color: #D3D3D3;
## border-right-style: none;
## border-right-width: 2px;
## border-right-color: #D3D3D3;
## }
##
## #wgdivzjzhr .gt_sourcenote {
## font-size: 90%;
## padding-top: 4px;
## padding-bottom: 4px;
## padding-left: 5px;
## padding-right: 5px;
## }
##
## #wgdivzjzhr .gt_left {
## text-align: left;
## }
##
## #wgdivzjzhr .gt_center {
## text-align: center;
## }
##
## #wgdivzjzhr .gt_right {
## text-align: right;
## font-variant-numeric: tabular-nums;
## }
##
## #wgdivzjzhr .gt_font_normal {
## font-weight: normal;
## }
##
## #wgdivzjzhr .gt_font_bold {
## font-weight: bold;
## }
##
## #wgdivzjzhr .gt_font_italic {
## font-style: italic;
## }
##
## #wgdivzjzhr .gt_super {
## font-size: 65%;
## }
##
## #wgdivzjzhr .gt_footnote_marks {
## font-size: 75%;
## vertical-align: 0.4em;
## position: initial;
## }
##
## #wgdivzjzhr .gt_asterisk {
## font-size: 100%;
## vertical-align: 0;
## }
##
## #wgdivzjzhr .gt_indent_1 {
## text-indent: 5px;
## }
##
## #wgdivzjzhr .gt_indent_2 {
## text-indent: 10px;
## }
##
## #wgdivzjzhr .gt_indent_3 {
## text-indent: 15px;
## }
##
## #wgdivzjzhr .gt_indent_4 {
## text-indent: 20px;
## }
##
## #wgdivzjzhr .gt_indent_5 {
## text-indent: 25px;
## }
##
## #wgdivzjzhr .katex-display {
## display: inline-flex !important;
## margin-bottom: 0.75em !important;
## }
##
## #wgdivzjzhr div.Reactable > div.rt-table > div.rt-thead > div.rt-tr.rt-tr-group-header > div.rt-th-group:after {
## height: 0px !important;
## }
## </style>
## <table class="gt_table" data-quarto-disable-processing="false" data-quarto-bootstrap="false">
## <caption><span class='gt_from_md'><strong>Table 2.</strong> Logistic Regression: Sleep Duration and Hypertension (NHANES 2021–2023)</span></caption>
## <thead>
## <tr class="gt_col_headings gt_spanner_row">
## <th class="gt_col_heading gt_columns_bottom_border gt_left" rowspan="2" colspan="1" scope="col" id="label"><span class='gt_from_md'><strong>Characteristic</strong></span></th>
## <th class="gt_center gt_columns_top_border gt_column_spanner_outer" rowspan="1" colspan="3" scope="colgroup" id="level 1; estimate_1">
## <div class="gt_column_spanner"><span class='gt_from_md'><strong>Crude Model</strong></span></div>
## </th>
## <th class="gt_center gt_columns_top_border gt_column_spanner_outer" rowspan="1" colspan="3" scope="colgroup" id="level 1; estimate_2">
## <div class="gt_column_spanner"><span class='gt_from_md'><strong>Adjusted Model</strong></span></div>
## </th>
## </tr>
## <tr class="gt_col_headings">
## <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="estimate_1"><span class='gt_from_md'><strong>Crude OR</strong></span></th>
## <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="conf.low_1"><span class='gt_from_md'><strong>95% CI</strong></span></th>
## <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="p.value_1"><span class='gt_from_md'><strong>p-value</strong></span></th>
## <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="estimate_2"><span class='gt_from_md'><strong>Adjusted OR</strong></span></th>
## <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="conf.low_2"><span class='gt_from_md'><strong>95% CI</strong></span></th>
## <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="p.value_2"><span class='gt_from_md'><strong>p-value</strong></span></th>
## </tr>
## </thead>
## <tbody class="gt_table_body">
## <tr><td headers="label" class="gt_row gt_left">Sleep Duration</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Normal (7-9h)</td>
## <td headers="estimate_1" class="gt_row gt_center">—</td>
## <td headers="conf.low_1" class="gt_row gt_center">—</td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Short (<7h)</td>
## <td headers="estimate_1" class="gt_row gt_center">1.28</td>
## <td headers="conf.low_1" class="gt_row gt_center">1.12, 1.47</td>
## <td headers="p.value_1" class="gt_row gt_center"><0.001</td>
## <td headers="estimate_2" class="gt_row gt_center">1.07</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.92, 1.25</td>
## <td headers="p.value_2" class="gt_row gt_center">0.4</td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Long (>9h)</td>
## <td headers="estimate_1" class="gt_row gt_center">1.34</td>
## <td headers="conf.low_1" class="gt_row gt_center">1.15, 1.57</td>
## <td headers="p.value_1" class="gt_row gt_center"><0.001</td>
## <td headers="estimate_2" class="gt_row gt_center">1.22</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.02, 1.47</td>
## <td headers="p.value_2" class="gt_row gt_center">0.032</td></tr>
## <tr><td headers="label" class="gt_row gt_left">Education Level</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> College+</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> <High School</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.35</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.99, 1.87</td>
## <td headers="p.value_2" class="gt_row gt_center">0.063</td></tr>
## <tr><td headers="label" class="gt_row gt_left"> High School/GED</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.34</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.05, 1.72</td>
## <td headers="p.value_2" class="gt_row gt_center">0.021</td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Some College</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.17</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.00, 1.37</td>
## <td headers="p.value_2" class="gt_row gt_center">0.046</td></tr>
## <tr><td headers="label" class="gt_row gt_left">Age (years)</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.05</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.05, 1.06</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
## <tr><td headers="label" class="gt_row gt_left">Sex</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Female</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Male</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.48</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.30, 1.67</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
## <tr><td headers="label" class="gt_row gt_left">Race/Ethnicity</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Non-Hispanic White</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Non-Hispanic Black</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.78</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.45, 2.18</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Mexican American</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">0.89</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.68, 1.15</td>
## <td headers="p.value_2" class="gt_row gt_center">0.4</td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Other Hispanic</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">0.89</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.72, 1.11</td>
## <td headers="p.value_2" class="gt_row gt_center">0.3</td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Non-Hispanic Asian</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.64</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.25, 2.17</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Other/Multiracial</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.12</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.87, 1.44</td>
## <td headers="p.value_2" class="gt_row gt_center">0.4</td></tr>
## <tr><td headers="label" class="gt_row gt_left">BMI (kg/m²)</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.09</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.08, 1.10</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
## <tr><td headers="label" class="gt_row gt_left">Smoking Status</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Never</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Former</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.08</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.94, 1.25</td>
## <td headers="p.value_2" class="gt_row gt_center">0.3</td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Current</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.23</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.02, 1.47</td>
## <td headers="p.value_2" class="gt_row gt_center">0.028</td></tr>
## <tr><td headers="label" class="gt_row gt_left">Diabetes</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> No</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
## <tr><td headers="label" class="gt_row gt_left"> Yes</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.67</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.37, 2.03</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
## </tbody>
## <tfoot>
## <tr class="gt_sourcenotes">
## <td class="gt_sourcenote" colspan="7"><span class='gt_from_md'>Abbreviations: CI = Confidence Interval, OR = Odds Ratio</span></td>
## </tr>
## </tfoot>
## </table>
## </div>
fitted_vals <- fitted(model_adj)
dev_resids <- residuals(model_adj, type = "deviance")
pearson_resids <- residuals(model_adj, type = "pearson")
std_dev_resids <- rstandard(model_adj)
leverage_vals <- hatvalues(model_adj)
cooks_d <- cooks.distance(model_adj)
n <- nrow(nhanes_analytic)
p <- length(coef(model_adj))
diag_df <- data.frame(
fitted = fitted_vals,
dev_resid = dev_resids,
pearson_res = pearson_resids,
std_devresid = std_dev_resids,
leverage = leverage_vals,
cooks = cooks_d,
index = 1:n
)
ggplot(diag_df, aes(x = fitted, y = dev_resid)) +
geom_point(alpha = 0.25, size = 0.8, color = "#555555") +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +
geom_smooth(method = "loess", se = FALSE, color = "red", linewidth = 0.9) +
labs(
title = "Figure D1. Deviance Residuals vs. Fitted Values",
x = "Fitted Probability",
y = "Deviance Residual"
) +
theme_minimal(base_size = 11)
## `geom_smooth()` using formula = 'y ~ x'
Figure D1. Deviance residuals plotted against fitted (predicted) probabilities from the adjusted logistic regression model. The red smoother line indicates the local average.
#Diagnostic Plot 2: Calibration — Predicted vs. Observed Probabilities by Decile # Hosmer-Lemeshow test
hl_test <- hoslem.test(
x = as.integer(nhanes_analytic$hypertension == "Yes"),
y = fitted_vals,
g = 10
)
hl_p <- round(hl_test$p.value, 3)
# Calibration data
calib_df <- data.frame(
obs = as.integer(nhanes_analytic$hypertension == "Yes"),
pred = fitted_vals
) %>%
mutate(decile = ntile(pred, 10)) %>%
group_by(decile) %>%
summarise(
mean_pred = mean(pred),
obs_prop = mean(obs),
n = n(),
.groups = "drop"
)
ggplot(calib_df, aes(x = mean_pred, y = obs_prop)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray50") +
geom_point(size = 3.5, color = "#1565C0") +
geom_line(color = "#1565C0", alpha = 0.6) +
scale_x_continuous(labels = percent_format(), limits = c(0, 1)) +
scale_y_continuous(labels = percent_format(), limits = c(0, 1)) +
labs(
title = "Figure D2. Calibration Plot (Predicted vs. Observed by Decile)",
subtitle = paste0("Hosmer-Lemeshow test p = ", hl_p),
x = "Mean Predicted Probability",
y = "Observed Proportion Hypertensive"
) +
theme_minimal(base_size = 11)
Figure D2. Calibration plot showing mean predicted probability versus observed hypertension proportion within each decile of predicted probability. Points close to the diagonal line indicate good calibration.
#Diagnostic Plot 3: Cook’s Distance — Influential Observations
threshold <- 4 / n
ggplot(diag_df, aes(x = index, y = cooks)) +
geom_point(aes(color = cooks > threshold), size = 0.7, alpha = 0.5) +
geom_hline(yintercept = threshold, linetype = "dashed", color = "red") +
scale_color_manual(values = c("FALSE" = "#888888", "TRUE" = "#D32F2F"),
labels = c("Non-influential", paste0("Cook's D > 4/n (", round(threshold, 5), ")")),
name = "") +
annotate("text", x = n * 0.85, y = threshold * 1.3,
label = paste0("Threshold = 4/n = ", round(threshold, 5)),
size = 3, color = "red") +
labs(
title = "Figure D3. Cook's Distance — Influential Observations",
x = "Observation Index",
y = "Cook's Distance"
) +
theme_minimal(base_size = 11) +
theme(legend.position = "bottom")
Figure D3. Cook’s distance for each observation in the analytic sample. Observations above the conventional threshold of 4/n are flagged as potentially influential.
n_influential <- sum(cooks_d > threshold)
leverage_threshold <- 2 * p / n
ggplot(diag_df, aes(x = leverage, y = std_devresid)) +
geom_point(alpha = 0.25, size = 0.7, color = "#555555") +
geom_hline(yintercept = c(-2, 2), linetype = "dashed", color = "#E91E63", alpha = 0.7) +
geom_vline(xintercept = leverage_threshold,
linetype = "dashed", color = "#1565C0", alpha = 0.7) +
geom_smooth(method = "loess", se = FALSE, color = "red",
linewidth = 0.8, alpha = 0.7) +
annotate("text", x = leverage_threshold + 0.001, y = 3.5,
label = "High leverage\nthreshold", color = "#1565C0",
size = 3, hjust = 0) +
labs(
title = "Figure D4. Leverage vs. Standardized Deviance Residuals",
x = "Leverage (Hat Value)",
y = "Standardized Deviance Residual"
) +
theme_minimal(base_size = 11)
## `geom_smooth()` using formula = 'y ~ x'
Figure D4. Standardized deviance residuals plotted against leverage (hat values). High leverage combined with large residuals identifies the most influential observations.
# Predicted probabilities from adjusted model using ggeffects
pred_sleep <- ggpredict(model_adj, terms = "sleep_cat")
ggplot(pred_sleep, aes(x = x, y = predicted)) +
geom_col(aes(fill = x), width = 0.55, alpha = 0.85) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
width = 0.12, linewidth = 0.9) +
geom_text(aes(label = sprintf("%.1f%%", predicted * 100)),
vjust = -0.6, size = 3.8, fontface = "bold") +
scale_fill_manual(values = c(
"Normal (7-9h)" = "#1565C0",
"Short (<7h)" = "#D32F2F",
"Long (>9h)" = "#388E3C"
)) +
scale_y_continuous(
labels = percent_format(accuracy = 1),
limits = c(0, max(pred_sleep$conf.high) * 1.2)
) +
labs(
title = "Figure 1. Adjusted Predicted Probability of Hypertension by Sleep Duration",
subtitle = "Adjusted for age, sex, race/ethnicity, BMI, education, smoking, and diabetes\n(NHANES 2021–2023)",
x = "Sleep Duration Category",
y = "Predicted Probability of Hypertension"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
plot.title = element_text(face = "bold", size = 11),
panel.grid.minor = element_blank()
)
Figure 1. Adjusted predicted probabilities of hypertension by sleep duration category, holding all covariates at their mean (continuous) or reference (categorical) values. Estimates are derived from the adjusted logistic regression model. Error bars represent 95% confidence intervals.
#Extract all coefficients from adjusted model
coef_df <- tidy(model_adj, exponentiate = TRUE, conf.int = TRUE) %>%
filter(term != "(Intercept)") %>%
mutate(
term_label = dplyr::recode(term,
"sleep_catShort (<7h)" = "Sleep: Short (<7h) vs. Normal",
"sleep_catLong (>9h)" = "Sleep: Long (>9h) vs. Normal",
"educ_cat<High School" = "Education: <High School vs. College+",
"educ_catHigh School/GED" = "Education: HS/GED vs. College+",
"educ_catSome College" = "Education: Some College vs. College+",
"age" = "Age (per year)",
"sexMale" = "Sex: Male vs. Female",
"race_ethNon-Hispanic Black" = "Race: Non-Hispanic Black vs. White",
"race_ethMexican American" = "Race: Mexican American vs. White",
"race_ethOther Hispanic" = "Race: Other Hispanic vs. White",
"race_ethNon-Hispanic Asian" = "Race: Non-Hispanic Asian vs. White",
"race_ethOther/Multiracial" = "Race: Other/Multiracial vs. White",
"bmi" = "BMI (per kg/m²)",
"smoke_catFormer" = "Smoking: Former vs. Never",
"smoke_catCurrent" = "Smoking: Current vs. Never",
"diabetesYes" = "Diabetes: Yes vs. No"
),
# Group for color coding
group = case_when(
grepl("^Sleep", term_label) ~ "Primary Exposure",
grepl("^Education",term_label) ~ "SES/Education",
grepl("^Age|^BMI", term_label) ~ "Continuous Covariates",
TRUE ~ "Other Covariates"
),
group = factor(group, levels = c("Primary Exposure", "SES/Education",
"Continuous Covariates", "Other Covariates")),
sig = ifelse(p.value < 0.05, "Significant (p<0.05)", "Non-significant"),
# Reorder: primary exposure at top
term_label = fct_reorder(term_label, estimate)
)
ggplot(coef_df, aes(x = estimate, y = term_label, color = group, shape = sig)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "gray50", linewidth = 0.7) +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high),
height = 0.25, linewidth = 0.75) +
geom_point(size = 3) +
scale_color_manual(
values = c(
"Primary Exposure" = "#D32F2F",
"SES/Education" = "#7B1FA2",
"Continuous Covariates"= "#1565C0",
"Other Covariates" = "#388E3C"
),
name = "Predictor Group"
) +
scale_shape_manual(
values = c("Significant (p<0.05)" = 16, "Non-significant" = 1),
name = "Statistical Significance"
) +
scale_x_log10(breaks = c(0.5, 0.75, 1.0, 1.25, 1.5, 2.0, 3.0, 5.0)) +
labs(
title = "Forest Plot — Adjusted Odds Ratios",
subtitle = "All predictors from the adjusted logistic regression model",
x = "Adjusted Odds Ratio (95% CI) — log scale",
y = NULL
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "bottom",
legend.box = "vertical",
plot.title = element_text(face = "bold", size = 12),
panel.grid.minor = element_blank(),
axis.text.y = element_text(size = 10)
)
## Warning: `geom_errorbarh()` was deprecated in ggplot2 4.0.0.
## ℹ Please use the `orientation` argument of `geom_errorbar()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `height` was translated to `width`.
Figure 2. Adjusted odds ratios (aOR) and 95% confidence intervals for all predictors in the adjusted logistic regression model. The vertical dashed line at OR = 1.0 represents the null. Estimates to the right of the null indicate increased odds of hypertension; estimates to the left indicate decreased odds.
forest_data <- bind_rows(
crude_tbl %>% mutate(Model = "Crude"),
adj_tbl %>% filter(grepl("sleep", term, ignore.case = TRUE)) %>% mutate(Model = "Adjusted")
) %>%
bind_rows(
tidy(model_crude, exponentiate = TRUE, conf.int = TRUE) %>%
filter(term != "(Intercept)") %>%
mutate(term = dplyr::recode(term,
"sleep_catShort (<7h)" = "Short sleep (<7h) vs. Normal",
"sleep_catLong (>9h)" = "Long sleep (>9h) vs. Normal"),
Model = "Crude"),
tidy(model_adj, exponentiate = TRUE, conf.int = TRUE) %>%
filter(grepl("sleep_cat", term)) %>%
mutate(term = dplyr:: recode(term,
"sleep_catShort (<7h)" = "Short sleep (<7h) vs. Normal",
"sleep_catLong (>9h)" = "Long sleep (>9h) vs. Normal"),
Model = "Adjusted")
) %>%
select(term, estimate, conf.low, conf.high, Model) %>%
distinct()
fig1 <- ggplot(forest_data, aes(x = estimate, y = term, color = Model, shape = Model)) +
geom_vline(xintercept = 1, linetype = "dashed", color = "gray50") +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high),
height = 0.2, linewidth = 0.8, position = position_dodge(0.4)) +
geom_point(size = 3.5, position = position_dodge(0.4)) +
scale_color_manual(values = c("Crude" = "#2196F3", "Adjusted" = "#E91E63")) +
scale_shape_manual(values = c("Crude" = 16, "Adjusted" = 17)) +
scale_x_continuous(breaks = seq(0.5, 2.5, 0.25), limits = c(0.5, 2.8)) +
labs(
title = "Association Between Sleep Duration and Hypertension",
subtitle = "NHANES 2021–2023 (N = 5,717)",
x = "Odds Ratio (95% CI)",
y = NULL,
color = "Model", shape = "Model"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
ggsave("fig1_forest_plot.pdf", fig1, width = 9, height = 5, dpi = 300)
## `height` was translated to `width`.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggsave("fig1_forest_plot.png", fig1, width = 9, height = 5, dpi = 300)
## `height` was translated to `width`.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
strat_short <- strat_df %>%
filter(grepl("Short", term)) %>%
mutate(
education = factor(education,
levels = c("<High School", "High School/GED", "Some College", "College+"),
labels = c("<High School", "HS/GED", "Some College", "College+")),
Sleep = "Short (<7h)"
)
strat_long <- strat_df %>%
filter(grepl("Long", term)) %>%
mutate(
education = factor(education,
levels = c("<High School", "High School/GED", "Some College", "College+"),
labels = c("<High School", "HS/GED", "Some College", "College+")),
Sleep = "Long (>9h)"
)
strat_plot_data <- bind_rows(strat_short, strat_long) %>%
mutate(sig = ifelse(p.value < 0.05, "*", ""))
fig2 <- ggplot(strat_plot_data, aes(x = education, y = estimate,
color = Sleep, group = Sleep, shape = Sleep)) +
geom_hline(yintercept = 1, linetype = "dashed", color = "gray50") +
geom_line(aes(linetype = Sleep), linewidth = 0.8, alpha = 0.6) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
width = 0.1, linewidth = 0.8, position = position_dodge(0.1)) +
geom_point(size = 3.5, position = position_dodge(0.1)) +
geom_text(aes(label = sig, y = conf.high + 0.08), color = "black",
size = 5, fontface = "bold", show.legend = FALSE) +
scale_color_manual(values = c("Short (<7h)" = "#E74C3C", "Long (>9h)" = "#3498DB")) +
scale_shape_manual(values = c("Short (<7h)" = 16, "Long (>9h)" = 15)) +
scale_y_continuous(limits = c(0.3, 5), breaks = c(0.5, 1, 1.5, 2, 3, 4, 5)) +
labs(
title = "Effect Modification by Education: Adjusted ORs for Sleep Duration and Hypertension",
subtitle = "Stratified Analysis, NHANES 2021–2023 | * p<0.05",
x = "Educational Attainment (SES Proxy)",
y = "Adjusted Odds Ratio (95% CI)",
color = "Sleep Category",
shape = "Sleep Category",
linetype = "Sleep Category"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 0, hjust = 0.5)
)
ggsave("fig2_interaction_plot.pdf", fig2, width = 10, height = 6, dpi = 300)
ggsave("fig2_interaction_plot.png", fig2, width = 10, height = 6, dpi = 300)
prev_data <- nhanes_analytic%>%
group_by(sleep_cat, educ_cat) %>%
summarise(prev = mean(hypertension) * 100, n = n(), .groups = "drop") %>%
filter(!is.na(educ_cat))
## Warning: There were 12 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `prev = mean(hypertension) * 100`.
## ℹ In group 1: `sleep_cat = Normal (7-9h)`, `educ_cat = College+`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 11 remaining warnings.
fig3 <- ggplot(prev_data, aes(x = sleep_cat, y = prev, fill = educ_cat)) +
geom_bar(stat = "identity", position = position_dodge(0.8), width = 0.7, alpha = 0.87) +
geom_text(aes(label = sprintf("%.0f%%", prev)),
position = position_dodge(0.8), vjust = -0.4, size = 3, fontface = "bold") +
scale_fill_manual(values = c("#D32F2F","#388E3C","#1565C0","#7B1FA2")) +
scale_y_continuous(labels = percent_format(scale = 1), limits = c(0, 90)) +
labs(
title = "Hypertension Prevalence by Sleep Duration and Educational Attainment",
subtitle = "NHANES 2021–2023 (N = 5,717)",
x = "Sleep Duration Category",
y = "Hypertension Prevalence (%)",
fill = "Education Level"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
)
ggsave("fig3_prevalence_bars.pdf", fig3, width = 10, height = 6, dpi = 300)
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggsave("fig3_prevalence_bars.png", fig3, width = 10, height = 6, dpi = 300)
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Removed 12 rows containing missing values or values outside the scale range
## (`geom_text()`).
cat(“✓ Analysis complete. All figures and tables saved.”) cat(“Output files:”) cat(” - fig1_forest_plot.pdf/png“) cat(” - fig2_interaction_plot.pdf/png“) cat(” - fig3_prevalence_bars.pdf/png“)