R Markdown

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

plot(pressure)

Including Plots

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.5.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.5.2

## Warning: package 'tibble' was built under R version 4.5.2

## Warning: package 'tidyr' was built under R version 4.5.2

## Warning: package 'readr' was built under R version 4.5.2

## Warning: package 'purrr' was built under R version 4.5.2

## Warning: package 'stringr' was built under R version 4.5.2

## Warning: package 'forcats' was built under R version 4.5.2

## Warning: package 'lubridate' was built under R version 4.5.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ readr     2.1.6
## ✔ ggplot2   4.0.2     ✔ stringr   1.6.0
## ✔ lubridate 1.9.4     ✔ tibble    3.3.1
## ✔ purrr     1.2.1     ✔ tidyr     1.3.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(haven)

## Warning: package 'haven' was built under R version 4.5.2

library(janitor)

## Warning: package 'janitor' was built under R version 4.5.2

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(knitr)

## Warning: package 'knitr' was built under R version 4.5.2

library(kableExtra)

## Warning: package 'kableExtra' was built under R version 4.5.2

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(plotly)

## Warning: package 'plotly' was built under R version 4.5.2

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library(broom)
library(ggeffects)

## Warning: package 'ggeffects' was built under R version 4.5.2

library(gtsummary)

## Warning: package 'gtsummary' was built under R version 4.5.2

library(GGally)

## Warning: package 'GGally' was built under R version 4.5.2

library(car)

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.5.2

## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## The following object is masked from 'package:dplyr':
## 
##     recode

library(lmtest)

## Warning: package 'lmtest' was built under R version 4.5.2

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 4.5.2

## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(corrplot)

## Warning: package 'corrplot' was built under R version 4.5.2

## corrplot 0.95 loaded

library(flextable)

## Warning: package 'flextable' was built under R version 4.5.3

## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:gtsummary':
## 
##     continuous_summary
## 
## The following objects are masked from 'package:plotly':
## 
##     highlight, style
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## The following object is masked from 'package:purrr':
## 
##     compose

library(officer)

## Warning: package 'officer' was built under R version 4.5.2

library(scales)

## Warning: package 'scales' was built under R version 4.5.2

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

library(ggplot2)
library(gtsummary)
library(gt)

## Warning: package 'gt' was built under R version 4.5.2

library(tableone)

## Warning: package 'tableone' was built under R version 4.5.2

library(survey)

## Warning: package 'survey' was built under R version 4.5.2

## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart

library(ResourceSelection)

## Warning: package 'ResourceSelection' was built under R version 4.5.3

## ResourceSelection 0.3-6   2023-06-27

Load the dataset

demo <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\DEMO_L.xpt") %>% clean_names()

bpx  <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\BPXO_L.xpt") %>% clean_names()

hypertension <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\BPQ_L.xpt") %>% clean_names()

sleep <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\SLQ_L.xpt") %>% clean_names()

bmi <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\BMX_L.xpt") %>% clean_names()

diabetes <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\DIQ_L.xpt") %>% clean_names()

smoking <- read_xpt("C:\\Users\\safwa\\OneDrive - University at Albany - SUNY\\EPI 553\\Final Paper\\Data\\SMQ_L.xpt") %>% clean_names()

View variable names

names(demo)

##  [1] "seqn"     "sddsrvyr" "ridstatr" "riagendr" "ridageyr" "ridagemn"
##  [7] "ridreth1" "ridreth3" "ridexmon" "ridexagm" "dmqmiliz" "dmdborn4"
## [13] "dmdyrusr" "dmdeduc2" "dmdmartz" "ridexprg" "dmdhhsiz" "dmdhrgnd"
## [19] "dmdhragz" "dmdhredz" "dmdhrmaz" "dmdhsedz" "wtint2yr" "wtmec2yr"
## [25] "sdmvstra" "sdmvpsu"  "indfmpir"

names(bpx)

##  [1] "seqn"     "bpaoarm"  "bpaocsz"  "bpxosy1"  "bpxodi1"  "bpxosy2" 
##  [7] "bpxodi2"  "bpxosy3"  "bpxodi3"  "bpxopls1" "bpxopls2" "bpxopls3"

names(hypertension)

## [1] "seqn"    "bpq020"  "bpq030"  "bpq150"  "bpq080"  "bpq101d"

names(sleep)

## [1] "seqn"   "slq300" "slq310" "sld012" "slq320" "slq330" "sld013"

names(bmi)

##  [1] "seqn"     "bmdstats" "bmxwt"    "bmiwt"    "bmxrecum" "bmirecum"
##  [7] "bmxhead"  "bmihead"  "bmxht"    "bmiht"    "bmxbmi"   "bmdbmic" 
## [13] "bmxleg"   "bmileg"   "bmxarml"  "bmiarml"  "bmxarmc"  "bmiarmc" 
## [19] "bmxwaist" "bmiwaist" "bmxhip"   "bmihip"

names(diabetes)

## [1] "seqn"    "diq010"  "did040"  "diq160"  "diq180"  "diq050"  "did060" 
## [8] "diq060u" "diq070"

names(smoking)

## [1] "seqn"     "smq020"   "smq040"   "smd641"   "smd650"   "smd100mn" "smq621"  
## [8] "smd630"   "smaquex2"

Merging Dataset

nhanes <- demo %>%
  left_join(bpx, by = "seqn") %>%
  left_join(hypertension, by = "seqn") %>%
  left_join(sleep, by = "seqn") %>%
  left_join(bmi, by = "seqn") %>%
  left_join(diabetes, by = "seqn") %>%
  left_join(smoking, by = "seqn")

Check for Missing Data

missing_summary <- data.frame(
  Variable = names(nhanes),
  Missing_Count = colSums(is.na(nhanes)),
  Missing_Percent = round(colSums(is.na(nhanes)) / nrow(nhanes) * 100, 2)
)

Show variables with the most missing data

print(missing_summary[order(-missing_summary$Missing_Count), ][1:15, ])

##          Variable Missing_Count Missing_Percent
## bmihead   bmihead         11933          100.00
## bmirecum bmirecum         11915           99.85
## smd630     smd630         11910           99.81
## bmxhead   bmxhead         11863           99.41
## bmiht       bmiht         11799           98.88
## bmiarml   bmiarml         11733           98.32
## bmiarmc   bmiarmc         11728           98.28
## smd641     smd641         11660           97.71
## diq060u   diq060u         11601           97.22
## did060     did060         11590           97.13
## bmiwt       bmiwt         11588           97.11
## bmiwaist bmiwaist         11586           97.09
## bmihip     bmihip         11572           96.97
## ridagemn ridagemn         11556           96.84
## bmileg     bmileg         11537           96.68

KEEP AEDULTS ONLY

nhanes <- nhanes %>%
  filter(ridageyr >= 18)

RECODE VARIABLES

nhanes <- nhanes %>%
mutate(
  # Mean BP from readings 2 & 3 (standard NHANES protocol)
  mean_sbp = rowMeans(cbind(bpxosy2, bpxosy3), na.rm = TRUE),
  mean_dbp = rowMeans(cbind(bpxodi2, bpxodi3), na.rm = TRUE),

  # Fall back to reading 1 if 2 & 3 missing
  mean_sbp = ifelse(is.nan(mean_sbp), bpxosy1, mean_sbp),
  mean_dbp = ifelse(is.nan(mean_dbp), bpxodi1, mean_dbp),

  # BP medication
  bp_meds = if_else(bpq150 == 1, 1L, 0L, missing = 0L),

    
hypertension = case_when(
  mean_sbp >= 130 ~ 1,
  mean_dbp >= 80  ~ 1,
  bpq150 == 1     ~ 1,
  mean_sbp < 130 & mean_dbp < 80 & (bpq150 == 2 | is.na(bpq150)) ~ 0,
  TRUE ~ NA_real_
  ),
  hypertension = factor(hypertension, levels = c(0,1), labels = c("No","Yes"))
)
# Check
table(nhanes$hypertension, useNA = "always")

## 
##   No  Yes <NA> 
## 2882 3824 1447

nhanes <- nhanes %>%
  mutate(
# Sleep Duration
  sld012 = ifelse(sld012 %in% c(77, 99), NA, sld012),
  sld013 = ifelse(sld013 %in% c(77, 99), NA, sld013),

  sleep_hrs = ((sld012 * 5) + (sld013 * 2)) / 7,

  sleep_cat = case_when(
    sleep_hrs < 7 ~ "Short (<7h)",
    sleep_hrs >= 7 & sleep_hrs <= 9 ~ "Normal (7-9h)",
    sleep_hrs > 9 ~ "Long (>9h)",
    TRUE ~ NA_character_
  ),

  sleep_cat = factor(
    sleep_cat,
    levels = c("Normal (7-9h)", "Short (<7h)", "Long (>9h)")
  ),

  # Education (SES Proxy)
  educ_cat = case_when(
    dmdeduc2 == 1 ~ "<High School",
    dmdeduc2 == 2 ~ "High School/GED",
    dmdeduc2 == 3 ~ "Some College",
    dmdeduc2 %in% c(4,5) ~ "College+",
    TRUE ~ NA_character_
  ),

  educ_cat = factor(
    educ_cat,
    levels = c("College+", "<High School", "High School/GED", "Some College")
  ),

  # Covariates
  age = ridageyr,

  sex = factor(
    if_else(riagendr == 1, "Male", "Female"),
    levels = c("Female", "Male")
  ),

  race_eth = case_when(
    ridreth3 == 3 ~ "Non-Hispanic White",
    ridreth3 == 4 ~ "Non-Hispanic Black",
    ridreth3 == 1 ~ "Mexican American",
    ridreth3 == 2 ~ "Other Hispanic",
    ridreth3 == 6 ~ "Non-Hispanic Asian",
    ridreth3 == 7 ~ "Other/Multiracial",
    TRUE ~ NA_character_
  ),

  race_eth = factor(
    race_eth,
    levels = c(
      "Non-Hispanic White",
      "Non-Hispanic Black",
      "Mexican American",
      "Other Hispanic",
      "Non-Hispanic Asian",
      "Other/Multiracial"
    )
  ),

  bmi = bmxbmi,

  smoke_cat = case_when(
    smq020 == 2 ~ "Never",
    smq020 == 1 & smq040 %in% c(1,2) ~ "Current",
    smq020 == 1 & (smq040 == 3 | is.na(smq040)) ~ "Former",
    TRUE ~ NA_character_
  ),

  smoke_cat = factor(
    smoke_cat,
    levels = c("Never", "Former", "Current"))
)

Diabetes

 nhanes <- nhanes %>%
  mutate(
    # Diabetes
    diabetes = case_when(
      diq010 == 1 ~ "Yes",
      diq010 == 2 ~ "No",
      TRUE ~ NA_character_
    ),
    diabetes = factor(diabetes, levels = c("No", "Yes"))
)
# Check
table(nhanes$diabetes, useNA = "always")

## 
##   No  Yes <NA> 
## 6803 1073  277

CREATE ANALYTIC SAMPLE (Complete Case)

analytic_vars <- c("seqn", "hypertension", "sleep_cat", "sleep_hrs", "educ_cat",
                   "age", "sex", "race_eth", "bmi", "smoke_cat", "diabetes",
                   "mean_sbp", "mean_dbp", "bp_meds", "wtmec2yr", "sdmvpsu", "sdmvstra")

nhanes_analytic <- nhanes %>%
  select(all_of(analytic_vars)) %>%
  filter(
    !is.na(hypertension), 
    !is.na(sleep_hrs), 
    !is.na(sleep_cat), 
    !is.na(educ_cat),
    !is.na(age), 
    !is.na(sex), 
    !is.na(race_eth),
    !is.na(bmi), 
    !is.na(smoke_cat), 
    !is.na(diabetes)
  )

cat("Analytic sample N =", nrow(nhanes_analytic), "\n")

## Analytic sample N = 5566

cat("Hypertension prevalence:", round(mean(nhanes_analytic$hypertension == "Yes") * 100, 1), "%\n")

## Hypertension prevalence: 54.6 %

nhanes_analytic$hypertension <- relevel(nhanes_analytic$hypertension, ref = "No")

SURVEY DESIGN

# NHANES 2021-2023 uses standard 2-year weights
svy_design <- svydesign(
  id      = ~sdmvpsu,
  strata  = ~sdmvstra,
  weights = ~wtmec2yr,
  data    = nhanes_analytic,
  nest    = TRUE
)

TABLE 1 — PARTICIPANT CHARACTERISTICS

tbl1_vars <- c("age", "sex", "race_eth", "educ_cat", "bmi",
               "smoke_cat", "diabetes", "hypertension", "mean_sbp", "mean_dbp")
cat_vars  <- c("sex", "race_eth", "educ_cat", "smoke_cat", "diabetes", "hypertension")

tbl1 <- CreateTableOne(
  vars     = tbl1_vars,
  strata   = "sleep_cat",
  data     = nhanes_analytic,
  factorVars = cat_vars,
  addOverall = TRUE,
  test     = TRUE
)
print(tbl1, showAllLevels = TRUE, formatOptions = list(big.mark = ","))

##                       Stratified by sleep_cat
##                        level              Overall        Normal (7-9h) 
##   n                                        5,566          3,635        
##   age (mean (SD))                          53.61 (17.15)  53.50 (17.13)
##   sex (%)              Female               3074 (55.2)    2003 (55.1) 
##                        Male                 2492 (44.8)    1632 (44.9) 
##   race_eth (%)         Non-Hispanic White   3332 (59.9)    2322 (63.9) 
##                        Non-Hispanic Black    663 (11.9)     340 ( 9.4) 
##                        Mexican American      365 ( 6.6)     226 ( 6.2) 
##                        Other Hispanic        550 ( 9.9)     335 ( 9.2) 
##                        Non-Hispanic Asian    296 ( 5.3)     202 ( 5.6) 
##                        Other/Multiracial     360 ( 6.5)     210 ( 5.8) 
##   educ_cat (%)         College+             3727 (67.0)    2600 (71.5) 
##                        <High School          250 ( 4.5)     123 ( 3.4) 
##                        High School/GED       424 ( 7.6)     222 ( 6.1) 
##                        Some College         1165 (20.9)     690 (19.0) 
##   bmi (mean (SD))                          29.71 (7.22)   29.36 (7.01) 
##   smoke_cat (%)        Never                3288 (59.1)    2249 (61.9) 
##                        Former               1457 (26.2)     947 (26.1) 
##                        Current               821 (14.8)     439 (12.1) 
##   diabetes (%)         No                   4747 (85.3)    3163 (87.0) 
##                        Yes                   819 (14.7)     472 (13.0) 
##   hypertension (%)     No                   2525 (45.4)    1732 (47.6) 
##                        Yes                  3041 (54.6)    1903 (52.4) 
##   mean_sbp (mean (SD))                    122.64 (18.03) 122.12 (17.46)
##   mean_dbp (mean (SD))                     74.48 (10.96)  74.15 (10.70)
##                       Stratified by sleep_cat
##                        Short (<7h)    Long (>9h)     p      test
##   n                     1,120            811                    
##   age (mean (SD))       53.76 (15.82)  53.90 (18.89)  0.795     
##   sex (%)                 576 (51.4)     495 (61.0)  <0.001     
##                           544 (48.6)     316 (39.0)             
##   race_eth (%)            583 (52.1)     427 (52.7)  <0.001     
##                           196 (17.5)     127 (15.7)             
##                            67 ( 6.0)      72 ( 8.9)             
##                           120 (10.7)      95 (11.7)             
##                            64 ( 5.7)      30 ( 3.7)             
##                            90 ( 8.0)      60 ( 7.4)             
##   educ_cat (%)            705 (62.9)     422 (52.0)  <0.001     
##                            55 ( 4.9)      72 ( 8.9)             
##                            98 ( 8.8)     104 (12.8)             
##                           262 (23.4)     213 (26.3)             
##   bmi (mean (SD))       30.65 (7.52)   30.00 (7.63)  <0.001     
##   smoke_cat (%)           581 (51.9)     458 (56.5)  <0.001     
##                           300 (26.8)     210 (25.9)             
##                           239 (21.3)     143 (17.6)             
##   diabetes (%)            932 (83.2)     652 (80.4)  <0.001     
##                           188 (16.8)     159 (19.6)             
##   hypertension (%)        465 (41.5)     328 (40.4)  <0.001     
##                           655 (58.5)     483 (59.6)             
##   mean_sbp (mean (SD)) 123.66 (18.30) 123.58 (20.02)  0.013     
##   mean_dbp (mean (SD))  75.70 (11.48)  74.27 (11.28) <0.001

# Table 1 with gtsummary
tbl1_pub <- nhanes_analytic %>%
  select(all_of(tbl1_vars), sleep_cat) %>%
  tbl_summary(
    by = sleep_cat,
    label = list(
      age       ~ "Age, years",
      sex       ~ "Sex",
      race_eth  ~ "Race/Ethnicity",
      educ_cat  ~ "Education Level",
      bmi       ~ "BMI (kg/m²)",
      smoke_cat ~ "Smoking Status",
      diabetes  ~ "Diabetes",
      hypertension     ~ "Hypertension",
      mean_sbp  ~ "Systolic BP (mmHg)",
      mean_dbp  ~ "Diastolic BP (mmHg)"
    ),
    statistic = list(
      all_continuous()  ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 1,
    missing = "no"
  ) %>%
  add_overall() %>%
  add_p(test = list(all_continuous() ~ "aov", all_categorical() ~ "chisq.test")) %>%
  bold_p(t = 0.05) %>%
  modify_spanning_header(c("stat_1","stat_2","stat_3") ~ "**Sleep Duration Category**") %>%
  modify_caption("**Table 1.** Participant Characteristics by Sleep Duration (NHANES 2021-2023, N = {N})")

## The following warnings were returned during `modify_caption()`:
## ! For variable `age` (`sleep_cat`) and "statistic" and "p.value" statistics:
##   The test "aov" in `add_p(test)` was deprecated in gtsummary 2.0.0. ℹ The same
##   functionality is covered in "oneway.test" with argument `var.equal = TRUE`.
## ! For variable `bmi` (`sleep_cat`) and "statistic" and "p.value" statistics:
##   The test "aov" in `add_p(test)` was deprecated in gtsummary 2.0.0. ℹ The same
##   functionality is covered in "oneway.test" with argument `var.equal = TRUE`.
## ! For variable `mean_dbp` (`sleep_cat`) and "statistic" and "p.value"
##   statistics: The test "aov" in `add_p(test)` was deprecated in gtsummary
##   2.0.0. ℹ The same functionality is covered in "oneway.test" with argument
##   `var.equal = TRUE`.
## ! For variable `mean_sbp` (`sleep_cat`) and "statistic" and "p.value"
##   statistics: The test "aov" in `add_p(test)` was deprecated in gtsummary
##   2.0.0. ℹ The same functionality is covered in "oneway.test" with argument
##   `var.equal = TRUE`.

tbl1_pub

**Table 1.** Participant Characteristics by Sleep Duration (NHANES 2021-2023, N = 5566)
Characteristic	Overall N = 5,566¹	Sleep Duration Category			p-value²
Characteristic	Overall N = 5,566¹	Normal (7-9h) N = 3,635¹	Short (<7h) N = 1,120¹	Long (>9h) N = 811¹	p-value²
Age, years	53.6 (17.1)	53.5 (17.1)	53.8 (15.8)	53.9 (18.9)	0.8
Sex					<0.001
Female	3,074 (55%)	2,003 (55%)	576 (51%)	495 (61%)
Male	2,492 (45%)	1,632 (45%)	544 (49%)	316 (39%)
Race/Ethnicity					<0.001
Non-Hispanic White	3,332 (60%)	2,322 (64%)	583 (52%)	427 (53%)
Non-Hispanic Black	663 (12%)	340 (9.4%)	196 (18%)	127 (16%)
Mexican American	365 (6.6%)	226 (6.2%)	67 (6.0%)	72 (8.9%)
Other Hispanic	550 (9.9%)	335 (9.2%)	120 (11%)	95 (12%)
Non-Hispanic Asian	296 (5.3%)	202 (5.6%)	64 (5.7%)	30 (3.7%)
Other/Multiracial	360 (6.5%)	210 (5.8%)	90 (8.0%)	60 (7.4%)
Education Level					<0.001
College+	3,727 (67%)	2,600 (72%)	705 (63%)	422 (52%)
<High School	250 (4.5%)	123 (3.4%)	55 (4.9%)	72 (8.9%)
High School/GED	424 (7.6%)	222 (6.1%)	98 (8.8%)	104 (13%)
Some College	1,165 (21%)	690 (19%)	262 (23%)	213 (26%)
BMI (kg/m²)	29.7 (7.2)	29.4 (7.0)	30.7 (7.5)	30.0 (7.6)	<0.001
Smoking Status					<0.001
Never	3,288 (59%)	2,249 (62%)	581 (52%)	458 (56%)
Former	1,457 (26%)	947 (26%)	300 (27%)	210 (26%)
Current	821 (15%)	439 (12%)	239 (21%)	143 (18%)
Diabetes	819 (15%)	472 (13%)	188 (17%)	159 (20%)	<0.001
Hypertension	3,041 (55%)	1,903 (52%)	655 (58%)	483 (60%)	<0.001
Systolic BP (mmHg)	122.6 (18.0)	122.1 (17.5)	123.7 (18.3)	123.6 (20.0)	0.013
Diastolic BP (mmHg)	74.5 (11.0)	74.2 (10.7)	75.7 (11.5)	74.3 (11.3)	<0.001
¹ Mean (SD); n (%)
² One-way analysis of means; Pearson’s Chi-squared test

#Distribution of Systolic Blood Pressure (Outcome Variable)

ggplot(nhanes_analytic %>% filter(!is.na(mean_sbp)), 
       aes(x = mean_sbp)) + 
  geom_histogram(aes(y = after_stat(density)),      # ← updated notation
                 bins = 40, fill = '#2E5496', alpha = 0.7) + 
  geom_density(color = 'red', linewidth = 1) + 
  geom_vline(xintercept = 130, linetype = 'dashed', color = 'darkred') + 
  annotate("text", x = 133, y = 0.025,              # ← optional: label the line
           label = "130 mmHg\nthreshold", 
           color = "darkred", size = 3, hjust = 0) +
  labs(
    title = "Figure 1. Distribution of Systolic Blood Pressure",
    subtitle = "NHANES 2021–2023 (N = 5,661)",
    x = "Mean Systolic BP (mmHg)", 
    y = "Density"
  ) + 
  theme_minimal()

#Hypertension Prevalence by Sleep Duration

fig2_data <- nhanes_analytic %>%
  group_by(sleep_cat, hypertension) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(sleep_cat) %>%
  mutate(prop = n / sum(n)) %>%
  filter(as.character(hypertension) == "Yes")   # ← fix factor comparison

fig2 <- ggplot(fig2_data, aes(x = sleep_cat, y = prop, fill = sleep_cat)) +
  geom_col(width = 0.6, alpha = 0.85) +
  geom_text(aes(label = sprintf("%.1f%%", prop * 100)),
            vjust = -0.4, size = 3.5, fontface = "bold") +
  scale_y_continuous(
    labels = scales::percent_format(),
    limits = c(0, max(fig2_data$prop) * 1.15)
  ) +
  scale_fill_manual(values = c("#1565C0", "#D32F2F", "#388E3C")) +
  labs(
    title = "Figure 2. Hypertension Prevalence by Sleep Duration",
    subtitle = "NHANES 2021–2023",
    x = "Sleep Duration Category",
    y = "Proportion Hypertensive (%)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )

print(fig2)

ggsave("fig2_hypertension_by_sleep.png", fig2, width = 7, height = 5, dpi = 300)

#Hypertension Prevalence by Education Level and Sleep Duration

nhanes_analytic %>%
  group_by(educ_cat, sleep_cat, hypertension) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(educ_cat, sleep_cat) %>%
  mutate(prop = n / sum(n)) %>%
  filter(as.character(hypertension) == "Yes") %>%       # ← fix factor comparison
  ggplot(aes(x = educ_cat, y = prop, fill = sleep_cat)) +
  geom_col(position = "dodge", alpha = 0.85) +
  scale_fill_manual(values = c(
    "Normal (7-9h)" = "#1565C0",
    "Short (<7h)"   = "#D32F2F",
    "Long (>9h)"    = "#388E3C"
  )) +
  scale_y_continuous(
    labels = scales::percent_format(),
    limits = c(0, 0.9)
  ) +
  geom_text(aes(label = scales::percent(prop, accuracy = 1)),
            position = position_dodge(0.9),
            vjust = -0.4, size = 3, fontface = "bold") +
  labs(
    title    = "Figure 3. Hypertension Prevalence by Education and Sleep Duration",
    subtitle = "NHANES 2021–2023",
    x        = "Education Level",
    y        = "Proportion Hypertensive (%)",
    fill     = "Sleep Duration"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x  = element_text(angle = 35, hjust = 1),
    legend.position  = "bottom",
    plot.title   = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )

#Regression Analysis # Model 1: Crude

model_crude <- glm(
  hypertension~ sleep_cat,
  data   = nhanes_analytic,
  family = binomial(link = "logit")
)

#  Model 2: Adjusted
model_adj <- glm(
  hypertension ~ sleep_cat + educ_cat + age + sex + race_eth + bmi + smoke_cat + diabetes,
  data   = nhanes_analytic,
  family = binomial(link = "logit")
)

SECTION B: BIVARIATE (CRUDE) LOGISTIC REGRESSION

Model 0: Crude — sleep duration only

model_crude <- glm(
  hypertension ~ sleep_cat,
  data   = nhanes_analytic,
  family = binomial(link = "logit")
)

crude_tbl <- tidy(model_crude, exponentiate = TRUE, conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  mutate(
    term = dplyr::recode(term,
      "sleep_catShort (<7h)" = "Short sleep (<7h) vs. Normal",
      "sleep_catLong (>9h)"  = "Long sleep (>9h) vs. Normal"),
    OR_CI = sprintf("%.2f (%.2f–%.2f)", estimate, conf.low, conf.high),
    p_fmt = ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  ) %>%
  select(term, OR_CI, p_fmt)

cat("\n=== Crude Logistic Regression ===\n")

## 
## === Crude Logistic Regression ===

print(crude_tbl)

## # A tibble: 2 × 3
##   term                         OR_CI            p_fmt 
##   <chr>                        <chr>            <chr> 
## 1 Short sleep (<7h) vs. Normal 1.28 (1.12–1.47) <0.001
## 2 Long sleep (>9h) vs. Normal  1.34 (1.15–1.57) <0.001

Merged regression table

tbl_crude <- model_crude %>%
  tbl_regression(
    exponentiate = TRUE,
    label = list(sleep_cat ~ "Sleep Duration")
  ) %>%
  modify_header(estimate = "**Crude OR**")

tbl_adj <- model_adj %>%
  tbl_regression(
    exponentiate = TRUE,
    label = list(
      sleep_cat ~ "Sleep Duration",
      educ_cat  ~ "Education Level",
      age       ~ "Age (years)",
      sex       ~ "Sex",
      race_eth  ~ "Race/Ethnicity",
      bmi       ~ "BMI (kg/m²)",
      smoke_cat ~ "Smoking Status",
      diabetes  ~ "Diabetes"
    )
  ) %>%
  modify_header(estimate = "**Adjusted OR**")

tbl_merge(
  list(tbl_crude, tbl_adj),
  tab_spanner = c("**Crude Model**", "**Adjusted Model**")
) %>%
  modify_caption("**Table 2.** Logistic Regression: Sleep Duration and Hypertension Risk (NHANES 2021–2023)")

## The number rows in the tables to be merged do not match, which may result in
## rows appearing out of order.
## ℹ See `tbl_merge()` (`?gtsummary::tbl_merge()`) help file for details. Use
##   `quiet=TRUE` to silence message.

**Table 2.** Logistic Regression: Sleep Duration and Hypertension Risk (NHANES 2021–2023)
Characteristic	Crude Model			Adjusted Model
Characteristic	Crude OR	95% CI	p-value	Adjusted OR	95% CI	p-value
Sleep Duration
Normal (7-9h)	—	—		—	—
Short (<7h)	1.28	1.12, 1.47	<0.001	1.07	0.92, 1.25	0.4
Long (>9h)	1.34	1.15, 1.57	<0.001	1.22	1.02, 1.47	0.032
Education Level
College+				—	—
<High School				1.35	0.99, 1.87	0.063
High School/GED				1.34	1.05, 1.72	0.021
Some College				1.17	1.00, 1.37	0.046
Age (years)				1.05	1.05, 1.06	<0.001
Sex
Female				—	—
Male				1.48	1.30, 1.67	<0.001
Race/Ethnicity
Non-Hispanic White				—	—
Non-Hispanic Black				1.78	1.45, 2.18	<0.001
Mexican American				0.89	0.68, 1.15	0.4
Other Hispanic				0.89	0.72, 1.11	0.3
Non-Hispanic Asian				1.64	1.25, 2.17	<0.001
Other/Multiracial				1.12	0.87, 1.44	0.4
BMI (kg/m²)				1.09	1.08, 1.10	<0.001
Smoking Status
Never				—	—
Former				1.08	0.94, 1.25	0.3
Current				1.23	1.02, 1.47	0.028
Diabetes
No				—	—
Yes				1.67	1.37, 2.03	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

SECTION C: ADJUSTED LOGISTIC REGRESSION

model_adj <- glm(
  hypertension ~ sleep_cat + educ_cat + age + sex + race_eth + bmi + smoke_cat + diabetes,
  data   = nhanes_analytic,
  family = binomial(link = "logit")
)

adj_tbl <- tidy(model_adj, exponentiate = TRUE, conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  mutate(
    term = dplyr::recode(term,
      "sleep_catShort (<7h)"              = "Short sleep (<7h) vs. Normal",
      "sleep_catLong (>9h)"               = "Long sleep (>9h) vs. Normal",
      "educ_cat<High School"              = "Education: <High School vs. College+",
      "educ_catHigh School/GED"           = "Education: High School/GED vs. College+",
      "educ_catSome College"              = "Education: Some College vs. College+",
      "sexMale"                           = "Sex: Male vs. Female",
      "race_ethNon-Hispanic Black"        = "Race: Non-Hispanic Black vs. White",
      "race_ethMexican American"          = "Race: Mexican American vs. White",
      "race_ethOther Hispanic"            = "Race: Other Hispanic vs. White",
      "race_ethNon-Hispanic Asian"        = "Race: Non-Hispanic Asian vs. White",
      "race_ethOther/Multiracial"         = "Race: Other/Multiracial vs. White",
      "smoke_catFormer"                   = "Smoking: Former vs. Never",
      "smoke_catCurrent"                  = "Smoking: Current vs. Never"
    ),
    aOR_CI = sprintf("%.2f (%.2f–%.2f)", estimate, conf.low, conf.high),
    p_fmt  = ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  ) %>%
  select(term, aOR_CI, p_fmt)

cat("\n=== Adjusted Logistic Regression ===\n")

## 
## === Adjusted Logistic Regression ===

print(adj_tbl, n = 30)

## # A tibble: 16 × 3
##    term                                    aOR_CI           p_fmt 
##    <chr>                                   <chr>            <chr> 
##  1 Short sleep (<7h) vs. Normal            1.07 (0.92–1.25) 0.376 
##  2 Long sleep (>9h) vs. Normal             1.22 (1.02–1.47) 0.032 
##  3 Education: <High School vs. College+    1.35 (0.99–1.87) 0.063 
##  4 Education: High School/GED vs. College+ 1.34 (1.05–1.72) 0.021 
##  5 Education: Some College vs. College+    1.17 (1.00–1.37) 0.046 
##  6 age                                     1.05 (1.05–1.06) <0.001
##  7 Sex: Male vs. Female                    1.48 (1.30–1.67) <0.001
##  8 Race: Non-Hispanic Black vs. White      1.78 (1.45–2.18) <0.001
##  9 Race: Mexican American vs. White        0.89 (0.68–1.15) 0.369 
## 10 Race: Other Hispanic vs. White          0.89 (0.72–1.11) 0.302 
## 11 Race: Non-Hispanic Asian vs. White      1.64 (1.25–2.17) <0.001
## 12 Race: Other/Multiracial vs. White       1.12 (0.87–1.44) 0.381 
## 13 bmi                                     1.09 (1.08–1.10) <0.001
## 14 Smoking: Former vs. Never               1.08 (0.94–1.25) 0.288 
## 15 Smoking: Current vs. Never              1.23 (1.02–1.47) 0.028 
## 16 diabetesYes                             1.67 (1.37–2.03) <0.001

SECTION D: INTERACTION MODEL (Sleep × Education)

model_int <- glm(
  hypertension ~ sleep_cat * educ_cat + age + sex + race_eth + bmi + smoke_cat + diabetes,
  data   = nhanes_analytic,
  family = binomial(link = "logit")
)

# Likelihood ratio test for interaction
lrt <- anova(model_adj, model_int, test = "LRT")
cat("\n=== Likelihood Ratio Test for Interaction ===\n")

## 
## === Likelihood Ratio Test for Interaction ===

print(lrt)

## Analysis of Deviance Table
## 
## Model 1: hypertension ~ sleep_cat + educ_cat + age + sex + race_eth + 
##     bmi + smoke_cat + diabetes
## Model 2: hypertension ~ sleep_cat * educ_cat + age + sex + race_eth + 
##     bmi + smoke_cat + diabetes
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1      5549     6234.6                     
## 2      5543     6232.3  6    2.327   0.8873

cat("Interaction p-value:", lrt$"Pr(>Chi)"[2], "\n")

## Interaction p-value: 0.8872993

SECTION E: STRATIFIED ANALYSIS (if p_interaction < 0.10)

cat("\n=== Stratified Analysis by Education ===\n")

## 
## === Stratified Analysis by Education ===

educ_levels <- levels(nhanes_analytic$educ_cat)

strat_results <- lapply(educ_levels, function(edu) {
  sub <- nhanes_analytic %>% filter(educ_cat == edu)
  
  if (nrow(sub) < 50) return(NULL)
  
  mod <- glm(
    hypertension ~ sleep_cat + age + sex + race_eth + bmi + smoke_cat + diabetes,
    data = sub, family = binomial(link = "logit")
  )
  
  tidy(mod, exponentiate = TRUE, conf.int = TRUE) %>%
    filter(grepl("sleep_cat", term)) %>%
    mutate(
      education = edu,
      n = nrow(sub),
      term =dplyr:: recode(term,
        "sleep_catShort (<7h)" = "Short (<7h) vs. Normal",
        "sleep_catLong (>9h)"  = "Long (>9h) vs. Normal"),
      aOR_CI = sprintf("%.2f (%.2f–%.2f)", estimate, conf.low, conf.high),
      p_fmt  = ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
    ) %>%
    select(education, n, term, estimate, conf.low, conf.high, aOR_CI, p_fmt, p.value)
})

strat_df <- bind_rows(strat_results)
cat("\nStratified aORs:\n")

## 
## Stratified aORs:

print(strat_df %>% select(education, n, term, aOR_CI, p_fmt), n = 20)

## # A tibble: 8 × 5
##   education           n term                   aOR_CI           p_fmt
##   <chr>           <int> <chr>                  <chr>            <chr>
## 1 College+         3727 Short (<7h) vs. Normal 1.13 (0.93–1.37) 0.209
## 2 College+         3727 Long (>9h) vs. Normal  1.20 (0.94–1.52) 0.146
## 3 <High School      250 Short (<7h) vs. Normal 0.77 (0.35–1.67) 0.501
## 4 <High School      250 Long (>9h) vs. Normal  0.97 (0.45–2.10) 0.937
## 5 High School/GED   424 Short (<7h) vs. Normal 1.00 (0.56–1.82) 0.989
## 6 High School/GED   424 Long (>9h) vs. Normal  1.38 (0.78–2.51) 0.275
## 7 Some College     1165 Short (<7h) vs. Normal 1.01 (0.73–1.41) 0.952
## 8 Some College     1165 Long (>9h) vs. Normal  1.25 (0.86–1.82) 0.238

SECTION F: REGRESSION TABLES (gtsummary)

Combined crude + adjusted table

tbl_crude <- model_crude %>%
  tbl_regression(
    exponentiate = TRUE,
    label = list(sleep_cat ~ "Sleep Duration"),
  ) %>%
  modify_header(estimate = "**Crude OR**")

tbl_adj <- model_adj %>%
  tbl_regression(
    exponentiate = TRUE,
    label = list(
      sleep_cat ~ "Sleep Duration",
      educ_cat  ~ "Education Level",
      age       ~ "Age (years)",
      sex       ~ "Sex",
      race_eth  ~ "Race/Ethnicity",
      bmi       ~ "BMI (kg/m²)",
      smoke_cat ~ "Smoking Status",
      diabetes  ~ "Diabetes"
    )
  ) %>%
  modify_header(estimate = "**Adjusted OR**")

tbl_merge <- tbl_merge(
  list(tbl_crude, tbl_adj),
  tab_spanner = c("**Crude Model**", "**Adjusted Model**")
) %>%
  modify_caption("**Table 2.** Logistic Regression: Sleep Duration and Hypertension (NHANES 2021–2023)")

## The number rows in the tables to be merged do not match, which may result in
## rows appearing out of order.
## ℹ See `tbl_merge()` (`?gtsummary::tbl_merge()`) help file for details. Use
##   `quiet=TRUE` to silence message.

print(tbl_merge)

## <div id="wgdivzjzhr" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
##   <style>#wgdivzjzhr table {
##   font-family: system-ui, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji';
##   -webkit-font-smoothing: antialiased;
##   -moz-osx-font-smoothing: grayscale;
## }
## 
## #wgdivzjzhr thead, #wgdivzjzhr tbody, #wgdivzjzhr tfoot, #wgdivzjzhr tr, #wgdivzjzhr td, #wgdivzjzhr th {
##   border-style: none;
## }
## 
## #wgdivzjzhr p {
##   margin: 0;
##   padding: 0;
## }
## 
## #wgdivzjzhr .gt_table {
##   display: table;
##   border-collapse: collapse;
##   line-height: normal;
##   margin-left: auto;
##   margin-right: auto;
##   color: #333333;
##   font-size: 16px;
##   font-weight: normal;
##   font-style: normal;
##   background-color: #FFFFFF;
##   width: auto;
##   border-top-style: solid;
##   border-top-width: 2px;
##   border-top-color: #A8A8A8;
##   border-right-style: none;
##   border-right-width: 2px;
##   border-right-color: #D3D3D3;
##   border-bottom-style: solid;
##   border-bottom-width: 2px;
##   border-bottom-color: #A8A8A8;
##   border-left-style: none;
##   border-left-width: 2px;
##   border-left-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_caption {
##   padding-top: 4px;
##   padding-bottom: 4px;
## }
## 
## #wgdivzjzhr .gt_title {
##   color: #333333;
##   font-size: 125%;
##   font-weight: initial;
##   padding-top: 4px;
##   padding-bottom: 4px;
##   padding-left: 5px;
##   padding-right: 5px;
##   border-bottom-color: #FFFFFF;
##   border-bottom-width: 0;
## }
## 
## #wgdivzjzhr .gt_subtitle {
##   color: #333333;
##   font-size: 85%;
##   font-weight: initial;
##   padding-top: 3px;
##   padding-bottom: 5px;
##   padding-left: 5px;
##   padding-right: 5px;
##   border-top-color: #FFFFFF;
##   border-top-width: 0;
## }
## 
## #wgdivzjzhr .gt_heading {
##   background-color: #FFFFFF;
##   text-align: center;
##   border-bottom-color: #FFFFFF;
##   border-left-style: none;
##   border-left-width: 1px;
##   border-left-color: #D3D3D3;
##   border-right-style: none;
##   border-right-width: 1px;
##   border-right-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_bottom_border {
##   border-bottom-style: solid;
##   border-bottom-width: 2px;
##   border-bottom-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_col_headings {
##   border-top-style: solid;
##   border-top-width: 2px;
##   border-top-color: #D3D3D3;
##   border-bottom-style: solid;
##   border-bottom-width: 2px;
##   border-bottom-color: #D3D3D3;
##   border-left-style: none;
##   border-left-width: 1px;
##   border-left-color: #D3D3D3;
##   border-right-style: none;
##   border-right-width: 1px;
##   border-right-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_col_heading {
##   color: #333333;
##   background-color: #FFFFFF;
##   font-size: 100%;
##   font-weight: normal;
##   text-transform: inherit;
##   border-left-style: none;
##   border-left-width: 1px;
##   border-left-color: #D3D3D3;
##   border-right-style: none;
##   border-right-width: 1px;
##   border-right-color: #D3D3D3;
##   vertical-align: bottom;
##   padding-top: 5px;
##   padding-bottom: 6px;
##   padding-left: 5px;
##   padding-right: 5px;
##   overflow-x: hidden;
## }
## 
## #wgdivzjzhr .gt_column_spanner_outer {
##   color: #333333;
##   background-color: #FFFFFF;
##   font-size: 100%;
##   font-weight: normal;
##   text-transform: inherit;
##   padding-top: 0;
##   padding-bottom: 0;
##   padding-left: 4px;
##   padding-right: 4px;
## }
## 
## #wgdivzjzhr .gt_column_spanner_outer:first-child {
##   padding-left: 0;
## }
## 
## #wgdivzjzhr .gt_column_spanner_outer:last-child {
##   padding-right: 0;
## }
## 
## #wgdivzjzhr .gt_column_spanner {
##   border-bottom-style: solid;
##   border-bottom-width: 2px;
##   border-bottom-color: #D3D3D3;
##   vertical-align: bottom;
##   padding-top: 5px;
##   padding-bottom: 5px;
##   overflow-x: hidden;
##   display: inline-block;
##   width: 100%;
## }
## 
## #wgdivzjzhr .gt_spanner_row {
##   border-bottom-style: hidden;
## }
## 
## #wgdivzjzhr .gt_group_heading {
##   padding-top: 8px;
##   padding-bottom: 8px;
##   padding-left: 5px;
##   padding-right: 5px;
##   color: #333333;
##   background-color: #FFFFFF;
##   font-size: 100%;
##   font-weight: initial;
##   text-transform: inherit;
##   border-top-style: solid;
##   border-top-width: 2px;
##   border-top-color: #D3D3D3;
##   border-bottom-style: solid;
##   border-bottom-width: 2px;
##   border-bottom-color: #D3D3D3;
##   border-left-style: none;
##   border-left-width: 1px;
##   border-left-color: #D3D3D3;
##   border-right-style: none;
##   border-right-width: 1px;
##   border-right-color: #D3D3D3;
##   vertical-align: middle;
##   text-align: left;
## }
## 
## #wgdivzjzhr .gt_empty_group_heading {
##   padding: 0.5px;
##   color: #333333;
##   background-color: #FFFFFF;
##   font-size: 100%;
##   font-weight: initial;
##   border-top-style: solid;
##   border-top-width: 2px;
##   border-top-color: #D3D3D3;
##   border-bottom-style: solid;
##   border-bottom-width: 2px;
##   border-bottom-color: #D3D3D3;
##   vertical-align: middle;
## }
## 
## #wgdivzjzhr .gt_from_md > :first-child {
##   margin-top: 0;
## }
## 
## #wgdivzjzhr .gt_from_md > :last-child {
##   margin-bottom: 0;
## }
## 
## #wgdivzjzhr .gt_row {
##   padding-top: 8px;
##   padding-bottom: 8px;
##   padding-left: 5px;
##   padding-right: 5px;
##   margin: 10px;
##   border-top-style: solid;
##   border-top-width: 1px;
##   border-top-color: #D3D3D3;
##   border-left-style: none;
##   border-left-width: 1px;
##   border-left-color: #D3D3D3;
##   border-right-style: none;
##   border-right-width: 1px;
##   border-right-color: #D3D3D3;
##   vertical-align: middle;
##   overflow-x: hidden;
## }
## 
## #wgdivzjzhr .gt_stub {
##   color: #333333;
##   background-color: #FFFFFF;
##   font-size: 100%;
##   font-weight: initial;
##   text-transform: inherit;
##   border-right-style: solid;
##   border-right-width: 2px;
##   border-right-color: #D3D3D3;
##   padding-left: 5px;
##   padding-right: 5px;
## }
## 
## #wgdivzjzhr .gt_stub_row_group {
##   color: #333333;
##   background-color: #FFFFFF;
##   font-size: 100%;
##   font-weight: initial;
##   text-transform: inherit;
##   border-right-style: solid;
##   border-right-width: 2px;
##   border-right-color: #D3D3D3;
##   padding-left: 5px;
##   padding-right: 5px;
##   vertical-align: top;
## }
## 
## #wgdivzjzhr .gt_row_group_first td {
##   border-top-width: 2px;
## }
## 
## #wgdivzjzhr .gt_row_group_first th {
##   border-top-width: 2px;
## }
## 
## #wgdivzjzhr .gt_summary_row {
##   color: #333333;
##   background-color: #FFFFFF;
##   text-transform: inherit;
##   padding-top: 8px;
##   padding-bottom: 8px;
##   padding-left: 5px;
##   padding-right: 5px;
## }
## 
## #wgdivzjzhr .gt_first_summary_row {
##   border-top-style: solid;
##   border-top-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_first_summary_row.thick {
##   border-top-width: 2px;
## }
## 
## #wgdivzjzhr .gt_last_summary_row {
##   padding-top: 8px;
##   padding-bottom: 8px;
##   padding-left: 5px;
##   padding-right: 5px;
##   border-bottom-style: solid;
##   border-bottom-width: 2px;
##   border-bottom-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_grand_summary_row {
##   color: #333333;
##   background-color: #FFFFFF;
##   text-transform: inherit;
##   padding-top: 8px;
##   padding-bottom: 8px;
##   padding-left: 5px;
##   padding-right: 5px;
## }
## 
## #wgdivzjzhr .gt_first_grand_summary_row {
##   padding-top: 8px;
##   padding-bottom: 8px;
##   padding-left: 5px;
##   padding-right: 5px;
##   border-top-style: double;
##   border-top-width: 6px;
##   border-top-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_last_grand_summary_row_top {
##   padding-top: 8px;
##   padding-bottom: 8px;
##   padding-left: 5px;
##   padding-right: 5px;
##   border-bottom-style: double;
##   border-bottom-width: 6px;
##   border-bottom-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_striped {
##   background-color: rgba(128, 128, 128, 0.05);
## }
## 
## #wgdivzjzhr .gt_table_body {
##   border-top-style: solid;
##   border-top-width: 2px;
##   border-top-color: #D3D3D3;
##   border-bottom-style: solid;
##   border-bottom-width: 2px;
##   border-bottom-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_footnotes {
##   color: #333333;
##   background-color: #FFFFFF;
##   border-bottom-style: none;
##   border-bottom-width: 2px;
##   border-bottom-color: #D3D3D3;
##   border-left-style: none;
##   border-left-width: 2px;
##   border-left-color: #D3D3D3;
##   border-right-style: none;
##   border-right-width: 2px;
##   border-right-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_footnote {
##   margin: 0px;
##   font-size: 90%;
##   padding-top: 4px;
##   padding-bottom: 4px;
##   padding-left: 5px;
##   padding-right: 5px;
## }
## 
## #wgdivzjzhr .gt_sourcenotes {
##   color: #333333;
##   background-color: #FFFFFF;
##   border-bottom-style: none;
##   border-bottom-width: 2px;
##   border-bottom-color: #D3D3D3;
##   border-left-style: none;
##   border-left-width: 2px;
##   border-left-color: #D3D3D3;
##   border-right-style: none;
##   border-right-width: 2px;
##   border-right-color: #D3D3D3;
## }
## 
## #wgdivzjzhr .gt_sourcenote {
##   font-size: 90%;
##   padding-top: 4px;
##   padding-bottom: 4px;
##   padding-left: 5px;
##   padding-right: 5px;
## }
## 
## #wgdivzjzhr .gt_left {
##   text-align: left;
## }
## 
## #wgdivzjzhr .gt_center {
##   text-align: center;
## }
## 
## #wgdivzjzhr .gt_right {
##   text-align: right;
##   font-variant-numeric: tabular-nums;
## }
## 
## #wgdivzjzhr .gt_font_normal {
##   font-weight: normal;
## }
## 
## #wgdivzjzhr .gt_font_bold {
##   font-weight: bold;
## }
## 
## #wgdivzjzhr .gt_font_italic {
##   font-style: italic;
## }
## 
## #wgdivzjzhr .gt_super {
##   font-size: 65%;
## }
## 
## #wgdivzjzhr .gt_footnote_marks {
##   font-size: 75%;
##   vertical-align: 0.4em;
##   position: initial;
## }
## 
## #wgdivzjzhr .gt_asterisk {
##   font-size: 100%;
##   vertical-align: 0;
## }
## 
## #wgdivzjzhr .gt_indent_1 {
##   text-indent: 5px;
## }
## 
## #wgdivzjzhr .gt_indent_2 {
##   text-indent: 10px;
## }
## 
## #wgdivzjzhr .gt_indent_3 {
##   text-indent: 15px;
## }
## 
## #wgdivzjzhr .gt_indent_4 {
##   text-indent: 20px;
## }
## 
## #wgdivzjzhr .gt_indent_5 {
##   text-indent: 25px;
## }
## 
## #wgdivzjzhr .katex-display {
##   display: inline-flex !important;
##   margin-bottom: 0.75em !important;
## }
## 
## #wgdivzjzhr div.Reactable > div.rt-table > div.rt-thead > div.rt-tr.rt-tr-group-header > div.rt-th-group:after {
##   height: 0px !important;
## }
## </style>
##   <table class="gt_table" data-quarto-disable-processing="false" data-quarto-bootstrap="false">
##   <caption><span class='gt_from_md'><strong>Table 2.</strong> Logistic Regression: Sleep Duration and Hypertension (NHANES 2021–2023)</span></caption>
##   <thead>
##     <tr class="gt_col_headings gt_spanner_row">
##       <th class="gt_col_heading gt_columns_bottom_border gt_left" rowspan="2" colspan="1" scope="col" id="label"><span class='gt_from_md'><strong>Characteristic</strong></span></th>
##       <th class="gt_center gt_columns_top_border gt_column_spanner_outer" rowspan="1" colspan="3" scope="colgroup" id="level 1; estimate_1">
##         <div class="gt_column_spanner"><span class='gt_from_md'><strong>Crude Model</strong></span></div>
##       </th>
##       <th class="gt_center gt_columns_top_border gt_column_spanner_outer" rowspan="1" colspan="3" scope="colgroup" id="level 1; estimate_2">
##         <div class="gt_column_spanner"><span class='gt_from_md'><strong>Adjusted Model</strong></span></div>
##       </th>
##     </tr>
##     <tr class="gt_col_headings">
##       <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="estimate_1"><span class='gt_from_md'><strong>Crude OR</strong></span></th>
##       <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="conf.low_1"><span class='gt_from_md'><strong>95% CI</strong></span></th>
##       <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="p.value_1"><span class='gt_from_md'><strong>p-value</strong></span></th>
##       <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="estimate_2"><span class='gt_from_md'><strong>Adjusted OR</strong></span></th>
##       <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="conf.low_2"><span class='gt_from_md'><strong>95% CI</strong></span></th>
##       <th class="gt_col_heading gt_columns_bottom_border gt_center" rowspan="1" colspan="1" scope="col" id="p.value_2"><span class='gt_from_md'><strong>p-value</strong></span></th>
##     </tr>
##   </thead>
##   <tbody class="gt_table_body">
##     <tr><td headers="label" class="gt_row gt_left">Sleep Duration</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Normal (7-9h)</td>
## <td headers="estimate_1" class="gt_row gt_center">—</td>
## <td headers="conf.low_1" class="gt_row gt_center">—</td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Short (&lt;7h)</td>
## <td headers="estimate_1" class="gt_row gt_center">1.28</td>
## <td headers="conf.low_1" class="gt_row gt_center">1.12, 1.47</td>
## <td headers="p.value_1" class="gt_row gt_center"><0.001</td>
## <td headers="estimate_2" class="gt_row gt_center">1.07</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.92, 1.25</td>
## <td headers="p.value_2" class="gt_row gt_center">0.4</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Long (&gt;9h)</td>
## <td headers="estimate_1" class="gt_row gt_center">1.34</td>
## <td headers="conf.low_1" class="gt_row gt_center">1.15, 1.57</td>
## <td headers="p.value_1" class="gt_row gt_center"><0.001</td>
## <td headers="estimate_2" class="gt_row gt_center">1.22</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.02, 1.47</td>
## <td headers="p.value_2" class="gt_row gt_center">0.032</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">Education Level</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    College+</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    &lt;High School</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.35</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.99, 1.87</td>
## <td headers="p.value_2" class="gt_row gt_center">0.063</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    High School/GED</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.34</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.05, 1.72</td>
## <td headers="p.value_2" class="gt_row gt_center">0.021</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Some College</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.17</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.00, 1.37</td>
## <td headers="p.value_2" class="gt_row gt_center">0.046</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">Age (years)</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.05</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.05, 1.06</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">Sex</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Female</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Male</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.48</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.30, 1.67</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">Race/Ethnicity</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Non-Hispanic White</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Non-Hispanic Black</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.78</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.45, 2.18</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Mexican American</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">0.89</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.68, 1.15</td>
## <td headers="p.value_2" class="gt_row gt_center">0.4</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Other Hispanic</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">0.89</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.72, 1.11</td>
## <td headers="p.value_2" class="gt_row gt_center">0.3</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Non-Hispanic Asian</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.64</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.25, 2.17</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Other/Multiracial</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.12</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.87, 1.44</td>
## <td headers="p.value_2" class="gt_row gt_center">0.4</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">BMI (kg/m²)</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.09</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.08, 1.10</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">Smoking Status</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Never</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Former</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.08</td>
## <td headers="conf.low_2" class="gt_row gt_center">0.94, 1.25</td>
## <td headers="p.value_2" class="gt_row gt_center">0.3</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Current</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.23</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.02, 1.47</td>
## <td headers="p.value_2" class="gt_row gt_center">0.028</td></tr>
##     <tr><td headers="label" class="gt_row gt_left">Diabetes</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_2" class="gt_row gt_center"><br /></td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    No</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">—</td>
## <td headers="conf.low_2" class="gt_row gt_center">—</td>
## <td headers="p.value_2" class="gt_row gt_center"><br /></td></tr>
##     <tr><td headers="label" class="gt_row gt_left">    Yes</td>
## <td headers="estimate_1" class="gt_row gt_center"><br /></td>
## <td headers="conf.low_1" class="gt_row gt_center"><br /></td>
## <td headers="p.value_1" class="gt_row gt_center"><br /></td>
## <td headers="estimate_2" class="gt_row gt_center">1.67</td>
## <td headers="conf.low_2" class="gt_row gt_center">1.37, 2.03</td>
## <td headers="p.value_2" class="gt_row gt_center"><0.001</td></tr>
##   </tbody>
##   <tfoot>
##     <tr class="gt_sourcenotes">
##       <td class="gt_sourcenote" colspan="7"><span class='gt_from_md'>Abbreviations: CI = Confidence Interval, OR = Odds Ratio</span></td>
##     </tr>
##   </tfoot>
## </table>
## </div>

Section 4: Model Diagnostics

Extract fitted values and residuals

fitted_vals   <- fitted(model_adj)
dev_resids    <- residuals(model_adj, type = "deviance")
pearson_resids <- residuals(model_adj, type = "pearson")
std_dev_resids <- rstandard(model_adj)
leverage_vals <- hatvalues(model_adj)
cooks_d       <- cooks.distance(model_adj)
n             <- nrow(nhanes_analytic)
p             <- length(coef(model_adj))

diag_df <- data.frame(
  fitted      = fitted_vals,
  dev_resid   = dev_resids,
  pearson_res = pearson_resids,
  std_devresid = std_dev_resids,
  leverage    = leverage_vals,
  cooks       = cooks_d,
  index       = 1:n
)

Diagnostic Plot 1: Deviance Residuals vs. Fitted Values

ggplot(diag_df, aes(x = fitted, y = dev_resid)) +
  geom_point(alpha = 0.25, size = 0.8, color = "#555555") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +
  geom_smooth(method = "loess", se = FALSE, color = "red", linewidth = 0.9) +
  labs(
    title = "Figure D1. Deviance Residuals vs. Fitted Values",
    x     = "Fitted Probability",
    y     = "Deviance Residual"
  ) +
  theme_minimal(base_size = 11)

## `geom_smooth()` using formula = 'y ~ x'

Figure D1. Deviance residuals plotted against fitted (predicted) probabilities from the adjusted logistic regression model. The red smoother line indicates the local average.

#Diagnostic Plot 2: Calibration — Predicted vs. Observed Probabilities by Decile # Hosmer-Lemeshow test

hl_test <- hoslem.test(
  x = as.integer(nhanes_analytic$hypertension == "Yes"),
  y = fitted_vals,
  g = 10
)
hl_p <- round(hl_test$p.value, 3)

# Calibration data 
calib_df <- data.frame(
  obs   = as.integer(nhanes_analytic$hypertension == "Yes"),
  pred  = fitted_vals
) %>%
  mutate(decile = ntile(pred, 10)) %>%
  group_by(decile) %>%
  summarise(
    mean_pred = mean(pred),
    obs_prop  = mean(obs),
    n         = n(),
    .groups   = "drop"
  )

ggplot(calib_df, aes(x = mean_pred, y = obs_prop)) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray50") +
  geom_point(size = 3.5, color = "#1565C0") +
  geom_line(color = "#1565C0", alpha = 0.6) +
  scale_x_continuous(labels = percent_format(), limits = c(0, 1)) +
  scale_y_continuous(labels = percent_format(), limits = c(0, 1)) +
  labs(
    title    = "Figure D2. Calibration Plot (Predicted vs. Observed by Decile)",
    subtitle = paste0("Hosmer-Lemeshow test p = ", hl_p),
    x        = "Mean Predicted Probability",
    y        = "Observed Proportion Hypertensive"
  ) +
  theme_minimal(base_size = 11)

Figure D2. Calibration plot showing mean predicted probability versus observed hypertension proportion within each decile of predicted probability. Points close to the diagonal line indicate good calibration.

#Diagnostic Plot 3: Cook’s Distance — Influential Observations

threshold <- 4 / n

ggplot(diag_df, aes(x = index, y = cooks)) +
  geom_point(aes(color = cooks > threshold), size = 0.7, alpha = 0.5) +
  geom_hline(yintercept = threshold, linetype = "dashed", color = "red") +
  scale_color_manual(values = c("FALSE" = "#888888", "TRUE" = "#D32F2F"),
                     labels = c("Non-influential", paste0("Cook's D > 4/n (", round(threshold, 5), ")")),
                     name   = "") +
  annotate("text", x = n * 0.85, y = threshold * 1.3,
           label = paste0("Threshold = 4/n = ", round(threshold, 5)),
           size = 3, color = "red") +
  labs(
    title = "Figure D3. Cook's Distance — Influential Observations",
    x     = "Observation Index",
    y     = "Cook's Distance"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom")

Figure D3. Cook’s distance for each observation in the analytic sample. Observations above the conventional threshold of 4/n are flagged as potentially influential.

n_influential <- sum(cooks_d > threshold)

Diagnostic Plot 4: Leverage vs. Standardized Deviance Residuals

leverage_threshold <- 2 * p / n

ggplot(diag_df, aes(x = leverage, y = std_devresid)) +
  geom_point(alpha = 0.25, size = 0.7, color = "#555555") +
  geom_hline(yintercept = c(-2, 2), linetype = "dashed", color = "#E91E63", alpha = 0.7) +
  geom_vline(xintercept = leverage_threshold,
             linetype = "dashed", color = "#1565C0", alpha = 0.7) +
  geom_smooth(method = "loess", se = FALSE, color = "red",
              linewidth = 0.8, alpha = 0.7) +
  annotate("text", x = leverage_threshold + 0.001, y = 3.5,
           label = "High leverage\nthreshold", color = "#1565C0",
           size = 3, hjust = 0) +
  labs(
    title = "Figure D4. Leverage vs. Standardized Deviance Residuals",
    x     = "Leverage (Hat Value)",
    y     = "Standardized Deviance Residual"
  ) +
  theme_minimal(base_size = 11)

## `geom_smooth()` using formula = 'y ~ x'

Figure D4. Standardized deviance residuals plotted against leverage (hat values). High leverage combined with large residuals identifies the most influential observations.

Section 5: Regression Visualizations

Figure 1: Predicted Probabilities of Hypertension by Sleep Duration

# Predicted probabilities from adjusted model using ggeffects
pred_sleep <- ggpredict(model_adj, terms = "sleep_cat")

ggplot(pred_sleep, aes(x = x, y = predicted)) +
  geom_col(aes(fill = x), width = 0.55, alpha = 0.85) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0.12, linewidth = 0.9) +
  geom_text(aes(label = sprintf("%.1f%%", predicted * 100)),
            vjust = -0.6, size = 3.8, fontface = "bold") +
  scale_fill_manual(values = c(
    "Normal (7-9h)" = "#1565C0",
    "Short (<7h)"   = "#D32F2F",
    "Long (>9h)"    = "#388E3C"
  )) +
  scale_y_continuous(
    labels = percent_format(accuracy = 1),
    limits = c(0, max(pred_sleep$conf.high) * 1.2)
  ) +
  labs(
    title    = "Figure 1. Adjusted Predicted Probability of Hypertension by Sleep Duration",
    subtitle = "Adjusted for age, sex, race/ethnicity, BMI, education, smoking, and diabetes\n(NHANES 2021–2023)",
    x        = "Sleep Duration Category",
    y        = "Predicted Probability of Hypertension"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position  = "none",
    plot.title       = element_text(face = "bold", size = 11),
    panel.grid.minor = element_blank()
  )

Figure 1. Adjusted predicted probabilities of hypertension by sleep duration category, holding all covariates at their mean (continuous) or reference (categorical) values. Estimates are derived from the adjusted logistic regression model. Error bars represent 95% confidence intervals.

Figure 2: Full Model Coefficient Plot (Forest Plot)

#Extract all coefficients from adjusted model

coef_df <- tidy(model_adj, exponentiate = TRUE, conf.int = TRUE) %>%
  filter(term != "(Intercept)") %>%
  mutate(
    term_label = dplyr::recode(term,
      "sleep_catShort (<7h)"          = "Sleep: Short (<7h) vs. Normal",
      "sleep_catLong (>9h)"           = "Sleep: Long (>9h) vs. Normal",
      "educ_cat<High School"          = "Education: <High School vs. College+",
      "educ_catHigh School/GED"       = "Education: HS/GED vs. College+",
      "educ_catSome College"          = "Education: Some College vs. College+",
      "age"                           = "Age (per year)",
      "sexMale"                       = "Sex: Male vs. Female",
      "race_ethNon-Hispanic Black"    = "Race: Non-Hispanic Black vs. White",
      "race_ethMexican American"      = "Race: Mexican American vs. White",
      "race_ethOther Hispanic"        = "Race: Other Hispanic vs. White",
      "race_ethNon-Hispanic Asian"    = "Race: Non-Hispanic Asian vs. White",
      "race_ethOther/Multiracial"     = "Race: Other/Multiracial vs. White",
      "bmi"                           = "BMI (per kg/m²)",
      "smoke_catFormer"               = "Smoking: Former vs. Never",
      "smoke_catCurrent"              = "Smoking: Current vs. Never",
      "diabetesYes"                   = "Diabetes: Yes vs. No"
    ),
    # Group for color coding
    group = case_when(
      grepl("^Sleep",    term_label) ~ "Primary Exposure",
      grepl("^Education",term_label) ~ "SES/Education",
      grepl("^Age|^BMI", term_label) ~ "Continuous Covariates",
      TRUE ~ "Other Covariates"
    ),
    group = factor(group, levels = c("Primary Exposure", "SES/Education",
                                     "Continuous Covariates", "Other Covariates")),
    sig   = ifelse(p.value < 0.05, "Significant (p<0.05)", "Non-significant"),
    # Reorder: primary exposure at top
    term_label = fct_reorder(term_label, estimate)
  )

ggplot(coef_df, aes(x = estimate, y = term_label, color = group, shape = sig)) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "gray50", linewidth = 0.7) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high),
                 height = 0.25, linewidth = 0.75) +
  geom_point(size = 3) +
  scale_color_manual(
    values = c(
      "Primary Exposure"     = "#D32F2F",
      "SES/Education"        = "#7B1FA2",
      "Continuous Covariates"= "#1565C0",
      "Other Covariates"     = "#388E3C"
    ),
    name = "Predictor Group"
  ) +
  scale_shape_manual(
    values = c("Significant (p<0.05)" = 16, "Non-significant" = 1),
    name   = "Statistical Significance"
  ) +
  scale_x_log10(breaks = c(0.5, 0.75, 1.0, 1.25, 1.5, 2.0, 3.0, 5.0)) +
  labs(
    title    = "Forest Plot — Adjusted Odds Ratios",
    subtitle = "All predictors from the adjusted logistic regression model",
    x        = "Adjusted Odds Ratio (95% CI) — log scale",
    y        = NULL
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position  = "bottom",
    legend.box       = "vertical",
    plot.title       = element_text(face = "bold", size = 12),
    panel.grid.minor = element_blank(),
    axis.text.y      = element_text(size = 10)
  )

## Warning: `geom_errorbarh()` was deprecated in ggplot2 4.0.0.
## ℹ Please use the `orientation` argument of `geom_errorbar()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `height` was translated to `width`.

Figure 2. Adjusted odds ratios (aOR) and 95% confidence intervals for all predictors in the adjusted logistic regression model. The vertical dashed line at OR = 1.0 represents the null. Estimates to the right of the null indicate increased odds of hypertension; estimates to the left indicate decreased odds.

SECTION G: FIGURES

Figure 1: Forest Plot (Crude & Adjusted ORs)

forest_data <- bind_rows(
  crude_tbl %>% mutate(Model = "Crude"),
  adj_tbl   %>% filter(grepl("sleep", term, ignore.case = TRUE)) %>% mutate(Model = "Adjusted")
) %>%
  bind_rows(
    tidy(model_crude, exponentiate = TRUE, conf.int = TRUE) %>%
      filter(term != "(Intercept)") %>%
      mutate(term = dplyr::recode(term,
        "sleep_catShort (<7h)" = "Short sleep (<7h) vs. Normal",
        "sleep_catLong (>9h)"  = "Long sleep (>9h) vs. Normal"),
        Model = "Crude"),
    tidy(model_adj, exponentiate = TRUE, conf.int = TRUE) %>%
      filter(grepl("sleep_cat", term)) %>%
      mutate(term = dplyr:: recode(term,
        "sleep_catShort (<7h)" = "Short sleep (<7h) vs. Normal",
        "sleep_catLong (>9h)"  = "Long sleep (>9h) vs. Normal"),
        Model = "Adjusted")
  ) %>%
  select(term, estimate, conf.low, conf.high, Model) %>%
  distinct()

fig1 <- ggplot(forest_data, aes(x = estimate, y = term, color = Model, shape = Model)) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "gray50") +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high),
                 height = 0.2, linewidth = 0.8, position = position_dodge(0.4)) +
  geom_point(size = 3.5, position = position_dodge(0.4)) +
  scale_color_manual(values = c("Crude" = "#2196F3", "Adjusted" = "#E91E63")) +
  scale_shape_manual(values = c("Crude" = 16, "Adjusted" = 17)) +
  scale_x_continuous(breaks = seq(0.5, 2.5, 0.25), limits = c(0.5, 2.8)) +
  labs(
    title = "Association Between Sleep Duration and Hypertension",
    subtitle = "NHANES 2021–2023 (N = 5,717)",
    x = "Odds Ratio (95% CI)",
    y = NULL,
    color = "Model", shape = "Model"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )

ggsave("fig1_forest_plot.pdf", fig1, width = 9, height = 5, dpi = 300)

## `height` was translated to `width`.

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("fig1_forest_plot.png", fig1, width = 9, height = 5, dpi = 300)

## `height` was translated to `width`.

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

Figure 2: Interaction Plot (Stratified ORs by Education)

strat_short <- strat_df %>%
  filter(grepl("Short", term)) %>%
  mutate(
    education = factor(education,
      levels = c("<High School", "High School/GED", "Some College", "College+"),
      labels = c("<High School", "HS/GED", "Some College", "College+")),
    Sleep = "Short (<7h)"
  )
strat_long <- strat_df %>%
  filter(grepl("Long", term)) %>%
  mutate(
    education = factor(education,
      levels = c("<High School", "High School/GED", "Some College", "College+"),
      labels = c("<High School", "HS/GED", "Some College", "College+")),
    Sleep = "Long (>9h)"
  )

strat_plot_data <- bind_rows(strat_short, strat_long) %>%
  mutate(sig = ifelse(p.value < 0.05, "*", ""))

fig2 <- ggplot(strat_plot_data, aes(x = education, y = estimate,
                                    color = Sleep, group = Sleep, shape = Sleep)) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "gray50") +
  geom_line(aes(linetype = Sleep), linewidth = 0.8, alpha = 0.6) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0.1, linewidth = 0.8, position = position_dodge(0.1)) +
  geom_point(size = 3.5, position = position_dodge(0.1)) +
  geom_text(aes(label = sig, y = conf.high + 0.08), color = "black",
            size = 5, fontface = "bold", show.legend = FALSE) +
  scale_color_manual(values = c("Short (<7h)" = "#E74C3C", "Long (>9h)" = "#3498DB")) +
  scale_shape_manual(values = c("Short (<7h)" = 16, "Long (>9h)" = 15)) +
  scale_y_continuous(limits = c(0.3, 5), breaks = c(0.5, 1, 1.5, 2, 3, 4, 5)) +
  labs(
    title = "Effect Modification by Education: Adjusted ORs for Sleep Duration and Hypertension",
    subtitle = "Stratified Analysis, NHANES 2021–2023 | * p<0.05",
    x = "Educational Attainment (SES Proxy)",
    y = "Adjusted Odds Ratio (95% CI)",
    color = "Sleep Category",
    shape = "Sleep Category",
    linetype = "Sleep Category"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold"),
    axis.text.x = element_text(angle = 0, hjust = 0.5)
  )

ggsave("fig2_interaction_plot.pdf", fig2, width = 10, height = 6, dpi = 300)                                                                                                                                 
ggsave("fig2_interaction_plot.png", fig2, width = 10, height = 6, dpi = 300)

Figure 3: Grouped Bar Chart — Hypertension Prevalence

prev_data <- nhanes_analytic%>%
  group_by(sleep_cat, educ_cat) %>%
  summarise(prev = mean(hypertension) * 100, n = n(), .groups = "drop") %>%
  filter(!is.na(educ_cat))

## Warning: There were 12 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `prev = mean(hypertension) * 100`.
## ℹ In group 1: `sleep_cat = Normal (7-9h)`, `educ_cat = College+`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 11 remaining warnings.

fig3 <- ggplot(prev_data, aes(x = sleep_cat, y = prev, fill = educ_cat)) +
  geom_bar(stat = "identity", position = position_dodge(0.8), width = 0.7, alpha = 0.87) +
  geom_text(aes(label = sprintf("%.0f%%", prev)),
            position = position_dodge(0.8), vjust = -0.4, size = 3, fontface = "bold") +
  scale_fill_manual(values = c("#D32F2F","#388E3C","#1565C0","#7B1FA2")) +
  scale_y_continuous(labels = percent_format(scale = 1), limits = c(0, 90)) +
  labs(
    title = "Hypertension Prevalence by Sleep Duration and Educational Attainment",
    subtitle = "NHANES 2021–2023 (N = 5,717)",
    x = "Sleep Duration Category",
    y = "Hypertension Prevalence (%)",
    fill = "Education Level"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )

ggsave("fig3_prevalence_bars.pdf", fig3, width = 10, height = 6, dpi = 300)

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_bar()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggsave("fig3_prevalence_bars.png", fig3, width = 10, height = 6, dpi = 300)

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Removed 12 rows containing missing values or values outside the scale range
## (`geom_text()`).

cat(“✓ Analysis complete. All figures and tables saved.”) cat(“Output files:”) cat(” - fig1_forest_plot.pdf/png“) cat(” - fig2_interaction_plot.pdf/png“) cat(” - fig3_prevalence_bars.pdf/png“)

Progress Checking2

Ummat Safwat Sristy

2026-04-08

R Markdown

Including Plots

Including Plots

Load the dataset

View variable names

Merging Dataset

Check for Missing Data

Show variables with the most missing data

KEEP AEDULTS ONLY

RECODE VARIABLES

Diabetes

CREATE ANALYTIC SAMPLE (Complete Case)

SURVEY DESIGN

TABLE 1 — PARTICIPANT CHARACTERISTICS

SECTION B: BIVARIATE (CRUDE) LOGISTIC REGRESSION

Model 0: Crude — sleep duration only

Merged regression table

SECTION C: ADJUSTED LOGISTIC REGRESSION

SECTION D: INTERACTION MODEL (Sleep × Education)

SECTION E: STRATIFIED ANALYSIS (if p_interaction < 0.10)

SECTION F: REGRESSION TABLES (gtsummary)

Combined crude + adjusted table

Section 4: Model Diagnostics

Extract fitted values and residuals

Diagnostic Plot 1: Deviance Residuals vs. Fitted Values

Diagnostic Plot 4: Leverage vs. Standardized Deviance Residuals

Section 5: Regression Visualizations

Figure 1: Predicted Probabilities of Hypertension by Sleep Duration

Figure 2: Full Model Coefficient Plot (Forest Plot)

SECTION G: FIGURES

Figure 1: Forest Plot (Crude & Adjusted ORs)

Figure 2: Interaction Plot (Stratified ORs by Education)

Figure 3: Grouped Bar Chart — Hypertension Prevalence