1. Compute the mean, median, standard deviation, and IQR for each of the numeric variables used in the MetS criteria in:
a. the overall sample and
b. separately for male and female participants. (10 points)

Code for setup of crude analysis:

diastolic <- mets_sleep %>%
  ungroup %>%
  summarise(n = n(),
            Mean = mean(dia, na.rm = T), 
            Median = median(dia, na.rm = T),
            SD = sd(dia, na.rm = T),
            Q1 = quantile(dia, na.rm = T, probs = 0.25),
            Q3 = quantile(dia, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Diastolic") %>%
  select(Var, n, Mean, Median, SD, IQR)

systolic <- mets_sleep %>%
  ungroup %>%
  summarise(n = n(),
            Mean = mean(sys, na.rm = T), 
            Median = median(sys, na.rm = T),
            SD = sd(sys, na.rm = T),
            Q1 = quantile(sys, na.rm = T, probs = 0.25),
            Q3 = quantile(sys, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Systolic") %>%
  select(Var, n, Mean, Median, SD, IQR)

glucose <- mets_sleep %>%
  ungroup %>%
  summarise(n = n(),
            Mean = mean(glucose_lab, na.rm = T), 
            Median = median(glucose_lab, na.rm = T),
            SD = sd(glucose_lab, na.rm = T),
            Q1 = quantile(glucose_lab, na.rm = T, probs = 0.25),
            Q3 = quantile(glucose_lab, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Resting Glucose") %>%
  select(Var, n, Mean, Median, SD, IQR)

hdl <- mets_sleep %>%
  ungroup %>%
  summarise(n = n(),
            Mean = mean(hdl_lab, na.rm = T), 
            Median = median(hdl_lab, na.rm = T),
            SD = sd(hdl_lab, na.rm = T),
            Q1 = quantile(hdl_lab, na.rm = T, probs = 0.25),
            Q3 = quantile(hdl_lab, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "HDL") %>%
  select(Var, n, Mean, Median, SD, IQR)

triglycerides <- mets_sleep %>%
  ungroup %>%
  summarise(n = n(),
            Mean = mean(trig_lab, na.rm = T), 
            Median = median(trig_lab, na.rm = T),
            SD = sd(trig_lab, na.rm = T),
            Q1 = quantile(trig_lab, na.rm = T, probs = 0.25),
            Q3 = quantile(trig_lab, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Triglycerides") %>%
  select(Var, n, Mean, Median, SD, IQR)

waist <- mets_sleep %>%
  ungroup %>%
  summarise(n = n(),
            Mean = mean(waist_size, na.rm = T), 
            Median = median(waist_size, na.rm = T),
            SD = sd(waist_size, na.rm = T),
            Q1 = quantile(waist_size, na.rm = T, probs = 0.25),
            Q3 = quantile(waist_size, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Waist Size") %>%
  select(Var, n, Mean, Median, SD, IQR)


Crude Descriptive Statistics

mets_descriptive_crude %>%
  kbl(col.names = c("Var", "n", "Mean", "Median", "SD", "IQR"),
        caption = "MetS Components - Crude Descriptive Stats",
        digits = c(0, 0, 2, 2, 2, 2)) %>%
  kable_classic_2(font_size = 18,
                html_font = "Cambria",
                full_width = T)
MetS Components - Crude Descriptive Stats
Var n Mean Median SD IQR
Diastolic 2419 72.62 72.62 12.44 13.33
Systolic 2419 126.99 124.67 20.24 24.00
Resting Glucose 2419 114.30 105.00 36.61 18.00
HDL 2419 53.62 51.00 15.12 18.00
Triglycerides 2419 114.04 98.00 102.06 69.00
Waist Size 2419 100.26 99.03 16.51 21.40



Code for setup of stratified analysis.

diastolic_s <- mets_sleep %>%
  group_by(sex) %>%
  summarise(n = n(),
            Mean = mean(dia, na.rm = T), 
            Median = median(dia, na.rm = T),
            SD = sd(dia, na.rm = T),
            Q1 = quantile(dia, na.rm = T, probs = 0.25),
            Q3 = quantile(dia, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Diastolic") %>%
  select(Var, sex, n, Mean, Median, SD, IQR)

systolic_s <- mets_sleep %>%
  group_by(sex) %>%
  summarise(n = n(),
            Mean = mean(sys, na.rm = T), 
            Median = median(sys, na.rm = T),
            SD = sd(sys, na.rm = T),
            Q1 = quantile(sys, na.rm = T, probs = 0.25),
            Q3 = quantile(sys, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Systolic") %>%
  select(Var, sex, n, Mean, Median, SD, IQR)

glucose_s <- mets_sleep %>%
  group_by(sex) %>%
  summarise(n = n(),
            Mean = mean(glucose_lab, na.rm = T), 
            Median = median(glucose_lab, na.rm = T),
            SD = sd(glucose_lab, na.rm = T),
            Q1 = quantile(glucose_lab, na.rm = T, probs = 0.25),
            Q3 = quantile(glucose_lab, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Resting Glucose") %>%
  select(Var, sex, n, Mean, Median, SD, IQR)

hdl_s <- mets_sleep %>%
  group_by(sex) %>%
  summarise(n = n(),
            Mean = mean(hdl_lab, na.rm = T), 
            Median = median(hdl_lab, na.rm = T),
            SD = sd(hdl_lab, na.rm = T),
            Q1 = quantile(hdl_lab, na.rm = T, probs = 0.25),
            Q3 = quantile(hdl_lab, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "HDL") %>%
  select(Var, sex, n, Mean, Median, SD, IQR)

triglycerides_s <- mets_sleep %>%
  group_by(sex) %>%
  summarise(n = n(),
            Mean = mean(trig_lab, na.rm = T), 
            Median = median(trig_lab, na.rm = T),
            SD = sd(trig_lab, na.rm = T),
            Q1 = quantile(trig_lab, na.rm = T, probs = 0.25),
            Q3 = quantile(trig_lab, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Triglycerides") %>%
  select(Var, sex, n, Mean, Median, SD, IQR)

waist_s <- mets_sleep %>%
  group_by(sex) %>%
  summarise(n = n(),
            Mean = mean(waist_size, na.rm = T), 
            Median = median(waist_size, na.rm = T),
            SD = sd(waist_size, na.rm = T),
            Q1 = quantile(waist_size, na.rm = T, probs = 0.25),
            Q3 = quantile(waist_size, na.rm = T, probs = 0.75),
            IQR = Q3-Q1) %>%
  mutate(Var = "Waist Size") %>%
  select(Var, sex, n, Mean, Median, SD, IQR)


Sex-Stratified Descriptive Statistics

#print(mets_descriptive_strat)

mets_descriptive_strat %>%
  kbl(col.names = c("Var", "Sex", "n", "Mean", "Median", "SD", "IQR"),
      caption = "MetS Components - Stratified Descriptive Stats",
      digits = c(0, 0, 0, 2, 2, 2, 2)) %>%
  kable_classic_2(font_size = 18,
                html_font = "Cambria",
                full_width = T) 
MetS Components - Stratified Descriptive Stats
Var Sex n Mean Median SD IQR
Systolic Male 1179 127.85 126.00 18.44 20.67
Systolic Female 1240 126.17 123.33 21.79 26.67
Diastolic Male 1179 74.38 74.00 11.98 13.33
Diastolic Female 1240 70.95 72.00 12.65 12.67
Resting Glucose Male 1179 116.46 106.00 37.70 17.00
Resting Glucose Female 1240 112.25 103.00 35.44 18.55
HDL Male 1179 48.91 48.00 12.85 14.00
HDL Female 1240 58.10 57.00 15.74 18.00
Triglycerides Male 1179 124.69 102.00 128.26 71.00
Triglycerides Female 1240 103.92 91.50 66.91 64.25
Waist Size Male 1179 101.55 101.40 15.85 19.95
Waist Size Female 1240 99.03 98.55 17.03 22.35



2. For each of the five criteria used to ascertain MetS, compute the proportion of study subjects who have met each criterion (e.g., what proportion have elevated waist circumference?) in the overall sample. Note that this may require an additional data processing step to create the relevant binary variables to indicate whether each criterion has been met. (5 points)


large_Waist <- mets_comp2 %>%
  filter(!is.na(waist_large)) %>%
  group_by(waist_large) %>%
  summarise(n=n()) %>%
  mutate(Var = "Large Waist Circumference", 
         value = waist_large,
         P=n/sum(n),
         Of=sum(n),
         Perc = P*100) %>%
  select(Var, value, n, Of, Perc)
  
trig_elev <- mets_comp2 %>%
  filter(!is.na(elev_trigl)) %>%
  group_by(elev_trigl) %>%
  summarise(n=n()) %>%
  mutate(Var = "Elevated Triglycerides",
         value = elev_trigl,
         P=n/sum(n),
         Of=sum(n),
         Perc = P*100) %>%
  select(Var, value, n, Of, Perc) 

hdl_low <- mets_comp2 %>%
  filter(!is.na(low_hdl)) %>%
  group_by(low_hdl) %>%
  summarise(n=n()) %>%
  mutate(Var = "Low HDL", 
         value = low_hdl,
         P=n/sum(n),
         Of=sum(n),
         Perc = P*100) %>%
  select(Var, value, n, Of, Perc) 

gluc_elev <- mets_comp2 %>%
  filter(!is.na(elev_gluc)) %>%
  group_by(elev_gluc) %>%
  summarise(n=n()) %>%
  mutate(Var = "Elevated Fasting Glucose", 
         value = elev_gluc,
         P=n/sum(n),
         Of=sum(n),
         Perc = P*100) %>%
  select(Var, value, n, Of, Perc)

bp_elev <- mets_comp2 %>%
  filter(!is.na(elev_bp)) %>%
  group_by(elev_bp) %>%
  summarise(n=n()) %>%
  mutate(Var = "Elevated Blood Pressure", 
         value = elev_bp,
         P=n/sum(n),
         Of=sum(n),
         Perc = P*100) %>%
  select(Var, value, n, Of, Perc)

MetS Components - Proportions

binarydf <- mets_binary %>%
  filter(value == 1) %>%
  select(Var, n, Of, Perc)

binarydf %>%
  kbl(col.names = c("Var", "n (Yes)", "Total", "Proportion (%)"),
      caption = "MetS Components - Proportions",
      digits = c(0, 0, 0, 2)) %>%
  kable_classic_2(font_size = 18,
                  html_font = "Cambria",
                  full_width = T) 
MetS Components - Proportions
Var n (Yes) Total Proportion (%)
Large Waist Circumference 1411 2419 58.33
Elevated Triglycerides 453 2419 18.73
Low HDL 642 2419 26.54
Elevated Fasting Glucose 1615 2419 66.76
Elevated Blood Pressure 916 2419 37.87



3. What is the mean sleep duration of participants who had not met any of the criteria from the previous question, met exactly 1 of the criteria, exactly 2, 3, 4 or 5? Hint: R can interpret logical indicators (TRUE or FALSE) as numeric values, where FALSE=0 and TRUE=1, when mathematical operations (e.g., addition/subtraction) are applied to them. (5 points)

Mean Sleep Hours by number of MetS criteria met

mets_sleep_q3 <- mets_sleep %>%
  select(sleep, waist_large, elev_trigl, low_hdl, elev_gluc, elev_bp, MetS) %>%
  mutate(mets_score = sum(waist_large, elev_trigl, low_hdl, elev_gluc, elev_bp))
  
mets_sleep_q3 %>%
  filter(!is.na(mets_score)) %>%
  group_by(mets_score) %>%
  summarise(mean_sleep = mean(sleep),
            sd = sd(sleep),
            count = n()) %>%
  select(mets_score, count, mean_sleep, sd) %>%
  kbl(col.names = c("# MetS Criteria Met", "n", "Mean daily sleep hours", "SD"),
      caption = "Mean Sleep Hours by number of MetS criteria met",
      digits = c(0, 0, 4, 4),
      align= "cc",
      booktabs=TRUE) %>%
  kable_classic_2(font_size = 18,
                  html_font = "Cambria",
                  full_width = T)
Mean Sleep Hours by number of MetS criteria met
# MetS Criteria Met n Mean daily sleep hours SD
0 289 7.6038 1.3660
1 541 7.3725 1.5149
2 684 7.4306 1.4535
3 556 7.4110 1.5739
4 285 7.4474 1.4574
5 64 7.3984 1.5512

4. Repeat the analysis in the previous question but instead compute the proportion of participants who met at least 1, at least 2, at least 3, and at least 4 criteria. (5 points)

mets_sleep_q4 <- mets_sleep_q3 %>%
  mutate(mets_1 = ifelse(mets_score >= 1, 1, 0),
         mets_2 = ifelse(mets_score >= 2, 1, 0),
         mets_3 = ifelse(mets_score >= 3, 1, 0),
         mets_4 = ifelse(mets_score >= 4, 1, 0))

mets_one <- mets_sleep_q4 %>%
  filter(mets_1 == 1) %>%
  ungroup() %>%
  summarize(mean_sleep = mean(sleep),
            sd = sd(sleep),
            count = n()) %>%
  mutate(mets_score2 = "1 or more") %>%
  select(mets_score2, count, mean_sleep, sd)

mets_two <- mets_sleep_q4 %>%
  filter(mets_2 == 1) %>%
  ungroup() %>%
  summarize(mean_sleep = mean(sleep),
            sd = sd(sleep),
            count = n()) %>%
  mutate(mets_score2 = "2 or more") %>%
  select(mets_score2, count, mean_sleep, sd)

mets_three <- mets_sleep_q4 %>%
  filter(mets_3 == 1) %>%
  ungroup() %>%
  summarize(mean_sleep = mean(sleep),
            sd = sd(sleep),
            count = n()) %>%
  mutate(mets_score2 = "3 or more") %>%
  select(mets_score2, count, mean_sleep, sd)

mets_four <- mets_sleep_q4 %>%
  filter(mets_4 == 1) %>%
  ungroup() %>%
  summarize(mean_sleep = mean(sleep),
            sd = sd(sleep),
            count = n()) %>%
  mutate(mets_score2 = "4 or more") %>%
  select(mets_score2, count, mean_sleep, sd)

Mean sleep hours by levels of MetS criteria observed


mets_atleast %>%
  kbl(col.names = c("# MetS Criteria Met", "n", "Mean daily sleep hours", "SD"),
      digits = c(0, 0, 4, 4),
      align= "cccc",
      booktabs=TRUE) %>%
  kable_classic_2(font_size = 18,
                  html_font = "Cambria",
                  full_width = T)
# MetS Criteria Met n Mean daily sleep hours SD
1 or more 2130 7.4120 1.5036
2 or more 1589 7.4254 1.5000
3 or more 905 7.4215 1.5350
4 or more 349 7.4384 1.4729
5. Suppose that the MetS definition used (i.e., at least 3 criteria met) is considered the gold standard, i.e., it depicts the actual MetS status of participants. However, you decided to try out several alternative definitions of metabolic syndrome defined below. For each alternative definition, compute its sensitivity and specificity compared to the gold standard. (10 points)

  1. Definition 1: MetS is positive if the participant met Criterion #1 (elevated waist circumference).
  2. Definition 2: MetS is positive if the participant met either Criterion #1 or Criterion #3 (HDL cholesterol).
  3. Definition 3: MetS is positive if the participant met both Criterion #1 and Criterion #3.

sens <- mets_sleep_q3 %>%
  select(MetS, waist_large, low_hdl) %>%
  mutate(tp1 = ifelse(waist_large == 1 & MetS == "MetS+", 1, 0),
         fn1 = ifelse(waist_large == 0 & MetS == "MetS+", 1, 0),
         fp1 = ifelse(waist_large == 1 & MetS == "MetS-", 1, 0),
         tn1 = ifelse(waist_large == 0 & MetS == "MetS-", 1, 0),
         
         tp2 = ifelse((waist_large == 1 | low_hdl == 1) & MetS == "MetS+", 1, 0),
         fn2 = ifelse((waist_large == 0 & low_hdl == 0) & MetS == "MetS+", 1, 0),
         fp2 = ifelse((waist_large == 1 | low_hdl == 1) & MetS == "MetS-", 1, 0),
         tn2 = ifelse((waist_large == 0 & low_hdl == 0) & MetS == "MetS-", 1, 0),
         
         tp3 = ifelse((waist_large == 1 & low_hdl == 1) & MetS == "MetS+", 1, 0),
         fn3 = ifelse((waist_large == 0 | low_hdl == 0) & MetS == "MetS+", 1, 0),
         fp3 = ifelse((waist_large == 1 & low_hdl == 1) & MetS == "MetS-", 1, 0),
         tn3 = ifelse((waist_large == 0 | low_hdl == 0) & MetS == "MetS-", 1, 0))

tp1_sum = sum(sens$tp1, na.rm=TRUE)
fn1_sum = sum(sens$fn1, na.rm=TRUE)
fp1_sum = sum(sens$fp1, na.rm=TRUE)
tn1_sum = sum(sens$tn1, na.rm=TRUE)
sen1 = tp1_sum / (tp1_sum + fn1_sum)
spec1 = tn1_sum / (tn1_sum + fp1_sum)

tp2_sum = sum(sens$tp2, na.rm=TRUE)
fn2_sum = sum(sens$fn2, na.rm=TRUE)
fp2_sum = sum(sens$fp2, na.rm=TRUE)
tn2_sum = sum(sens$tn2, na.rm=TRUE)
sen2 = tp2_sum / (tp2_sum + fn2_sum)
spec2 = tn2_sum / (tn2_sum + fp2_sum)

tp3_sum = sum(sens$tp3, na.rm=TRUE)
fn3_sum = sum(sens$fn3, na.rm=TRUE)
fp3_sum = sum(sens$fp3, na.rm=TRUE)
tn3_sum = sum(sens$tn3, na.rm=TRUE)
sen3 = tp3_sum / (tp3_sum + fn3_sum)
spec3 = tn3_sum / (tn3_sum + fp3_sum)

Standard = meets the definition of 3 of the criteria

Sensitivity and Specificity of each alternate test compared to the standard:

Method 1: MetS+ based on elevated waste size as only criteria

Sensitivity = 89.17%
Specificity = 60.11%

Method 2: MetS+ based on either elevated waste size OR low HDL

Sensitivity = 96.91%
Specificity = 54.95%

Method 3: MetS+ based on both elevated waste size AND low HDL

Sensitivity = 47.85%
Specificity = 95.97%


6. The packages table1 and TableOne contain functions to create tables like Table 1 in the Smiley paper. In general, a “Table 1” (which may not necessary be the first table) numerically describes the data used for analysis. Use either package to create a Table 1 similar to that in the Smiley paper. However, you may omit p-values for this exercise and ignore the Sitting (minutes/day) variable. (10 points)

labels <- list(
    variables=list(sex="Sex, n(%)",
                   race = "Race/Ethnicity, n(%)",
                   marital = "Marital Status, n(%)",
                   education = "Education, n(%)",
                   dia = "Diastolic Blood Pressure (mmHg)",
                   sys = "Systolic Blood Pressure (mmHg)",
                   sit_time = "Sitting (Minutes/Day)",
                   sleep = "Sleep Duration (Hours)",
                   glucose_lab = "Fasting Blood Glucose (mg/dL)",
                   hdl_lab = "High Density Lipoprotein (mg/dL)",
                   age="Age (Years)",
                   waist_size = "Waist Circumference (cm)"),
    groups=list("", "Metabolic Syndrome"))

# Remove MetS from the name of the levels of the groups
levels(mets_sleep$MetS) <- c("Yes", "No")

caption <- "Participants - NHANES per Metabolic Syndrome"

strata <- c(list(Total=mets_sleep), split(mets_sleep, mets_sleep$MetS))

my.render.cont <- function(x) {
    with(stats.apply.rounding(stats.default(x), digits=4, digits.pct=2), c("",
        "Mean (SD)"=sprintf("%s (%s)", MEAN, SD)))
}
my.render.cat <- function(x) {
    c("", sapply(stats.default(x), function(y) with(y,
        sprintf("%d (%0.2f%%)", FREQ, PCT))))
}

table1(strata, labels, groupspan = c(1, 2), 
       caption=caption, 
       #footnote=footnote,
       render.continuous=my.render.cont, render.categorical=my.render.cat)
Participants - NHANES per Metabolic Syndrome
Metabolic Syndrome
Total
(N=2419)
Yes
(N=905)
No
(N=1514)
Sex, n(%)
Male 1179 (48.74%) 404 (44.64%) 775 (51.19%)
Female 1240 (51.26%) 501 (55.36%) 739 (48.81%)
Race/Ethnicity, n(%)
Hispanic 561 (23.19%) 255 (28.18%) 306 (20.21%)
White 800 (33.07%) 311 (34.36%) 489 (32.30%)
Black 578 (23.89%) 179 (19.78%) 399 (26.35%)
Asian 350 (14.47%) 104 (11.49%) 246 (16.25%)
Multiracial (Other) 130 (5.37%) 56 (6.19%) 74 (4.89%)
Marital Status, n(%)
Married or Living with partner 1418 (58.62%) 530 (58.56%) 888 (58.65%)
Widowed 181 (7.48%) 87 (9.61%) 94 (6.21%)
Divorced or Separated 387 (16.00%) 179 (19.78%) 208 (13.74%)
Never Married 433 (17.90%) 109 (12.04%) 324 (21.40%)
Education, n(%)
< 9th grade 199 (8.23%) 101 (11.16%) 98 (6.47%)
9th-11th grade 291 (12.03%) 132 (14.59%) 159 (10.50%)
High School 569 (23.52%) 226 (24.97%) 343 (22.66%)
Some College 783 (32.37%) 283 (31.27%) 500 (33.03%)
>= College Graduate 576 (23.81%) 163 (18.01%) 413 (27.28%)
Missing 1 (0.0%) 0 (0%) 1 (0.1%)
Diastolic Blood Pressure (mmHg)
Mean (SD) 72.62 (12.44) 75.51 (13.53) 70.90 (11.41)
Systolic Blood Pressure (mmHg)
Mean (SD) 127.0 (20.24) 136.4 (20.68) 121.3 (17.71)
Sitting (Minutes/Day)
Mean (SD) 366.0 (621.6) 402.1 (809.5) 344.4 (473.7)
Missing 3 (0.1%) 0 (0%) 3 (0.2%)
Sleep Duration (Hours)
Mean (SD) 7.435 (1.489) 7.422 (1.535) 7.443 (1.461)
Fasting Blood Glucose (mg/dL)
Mean (SD) 114.3 (36.61) 129.2 (46.42) 105.4 (25.32)
High Density Lipoprotein (mg/dL)
Mean (SD) 53.62 (15.12) 46.30 (12.16) 57.99 (15.03)
Age (Years)
Mean (SD) 51.46 (17.56) 56.30 (15.57) 48.58 (18.05)
Waist Circumference (cm)
Mean (SD) 100.3 (16.51) 109.7 (14.85) 94.59 (14.76)