1. Compute the mean, median, standard deviation,
and IQR for each of the numeric variables used in the MetS criteria in:
a. the overall sample and
b. separately for male and female
participants. (10 points)
Code for setup of crude analysis:
diastolic <- mets_sleep %>%
ungroup %>%
summarise(n = n(),
Mean = mean(dia, na.rm = T),
Median = median(dia, na.rm = T),
SD = sd(dia, na.rm = T),
Q1 = quantile(dia, na.rm = T, probs = 0.25),
Q3 = quantile(dia, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Diastolic") %>%
select(Var, n, Mean, Median, SD, IQR)
systolic <- mets_sleep %>%
ungroup %>%
summarise(n = n(),
Mean = mean(sys, na.rm = T),
Median = median(sys, na.rm = T),
SD = sd(sys, na.rm = T),
Q1 = quantile(sys, na.rm = T, probs = 0.25),
Q3 = quantile(sys, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Systolic") %>%
select(Var, n, Mean, Median, SD, IQR)
glucose <- mets_sleep %>%
ungroup %>%
summarise(n = n(),
Mean = mean(glucose_lab, na.rm = T),
Median = median(glucose_lab, na.rm = T),
SD = sd(glucose_lab, na.rm = T),
Q1 = quantile(glucose_lab, na.rm = T, probs = 0.25),
Q3 = quantile(glucose_lab, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Resting Glucose") %>%
select(Var, n, Mean, Median, SD, IQR)
hdl <- mets_sleep %>%
ungroup %>%
summarise(n = n(),
Mean = mean(hdl_lab, na.rm = T),
Median = median(hdl_lab, na.rm = T),
SD = sd(hdl_lab, na.rm = T),
Q1 = quantile(hdl_lab, na.rm = T, probs = 0.25),
Q3 = quantile(hdl_lab, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "HDL") %>%
select(Var, n, Mean, Median, SD, IQR)
triglycerides <- mets_sleep %>%
ungroup %>%
summarise(n = n(),
Mean = mean(trig_lab, na.rm = T),
Median = median(trig_lab, na.rm = T),
SD = sd(trig_lab, na.rm = T),
Q1 = quantile(trig_lab, na.rm = T, probs = 0.25),
Q3 = quantile(trig_lab, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Triglycerides") %>%
select(Var, n, Mean, Median, SD, IQR)
waist <- mets_sleep %>%
ungroup %>%
summarise(n = n(),
Mean = mean(waist_size, na.rm = T),
Median = median(waist_size, na.rm = T),
SD = sd(waist_size, na.rm = T),
Q1 = quantile(waist_size, na.rm = T, probs = 0.25),
Q3 = quantile(waist_size, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Waist Size") %>%
select(Var, n, Mean, Median, SD, IQR)
mets_descriptive_crude %>%
kbl(col.names = c("Var", "n", "Mean", "Median", "SD", "IQR"),
caption = "MetS Components - Crude Descriptive Stats",
digits = c(0, 0, 2, 2, 2, 2)) %>%
kable_classic_2(font_size = 18,
html_font = "Cambria",
full_width = T)
| Var | n | Mean | Median | SD | IQR |
|---|---|---|---|---|---|
| Diastolic | 2419 | 72.62 | 72.62 | 12.44 | 13.33 |
| Systolic | 2419 | 126.99 | 124.67 | 20.24 | 24.00 |
| Resting Glucose | 2419 | 114.30 | 105.00 | 36.61 | 18.00 |
| HDL | 2419 | 53.62 | 51.00 | 15.12 | 18.00 |
| Triglycerides | 2419 | 114.04 | 98.00 | 102.06 | 69.00 |
| Waist Size | 2419 | 100.26 | 99.03 | 16.51 | 21.40 |
Code for setup of stratified analysis.
diastolic_s <- mets_sleep %>%
group_by(sex) %>%
summarise(n = n(),
Mean = mean(dia, na.rm = T),
Median = median(dia, na.rm = T),
SD = sd(dia, na.rm = T),
Q1 = quantile(dia, na.rm = T, probs = 0.25),
Q3 = quantile(dia, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Diastolic") %>%
select(Var, sex, n, Mean, Median, SD, IQR)
systolic_s <- mets_sleep %>%
group_by(sex) %>%
summarise(n = n(),
Mean = mean(sys, na.rm = T),
Median = median(sys, na.rm = T),
SD = sd(sys, na.rm = T),
Q1 = quantile(sys, na.rm = T, probs = 0.25),
Q3 = quantile(sys, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Systolic") %>%
select(Var, sex, n, Mean, Median, SD, IQR)
glucose_s <- mets_sleep %>%
group_by(sex) %>%
summarise(n = n(),
Mean = mean(glucose_lab, na.rm = T),
Median = median(glucose_lab, na.rm = T),
SD = sd(glucose_lab, na.rm = T),
Q1 = quantile(glucose_lab, na.rm = T, probs = 0.25),
Q3 = quantile(glucose_lab, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Resting Glucose") %>%
select(Var, sex, n, Mean, Median, SD, IQR)
hdl_s <- mets_sleep %>%
group_by(sex) %>%
summarise(n = n(),
Mean = mean(hdl_lab, na.rm = T),
Median = median(hdl_lab, na.rm = T),
SD = sd(hdl_lab, na.rm = T),
Q1 = quantile(hdl_lab, na.rm = T, probs = 0.25),
Q3 = quantile(hdl_lab, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "HDL") %>%
select(Var, sex, n, Mean, Median, SD, IQR)
triglycerides_s <- mets_sleep %>%
group_by(sex) %>%
summarise(n = n(),
Mean = mean(trig_lab, na.rm = T),
Median = median(trig_lab, na.rm = T),
SD = sd(trig_lab, na.rm = T),
Q1 = quantile(trig_lab, na.rm = T, probs = 0.25),
Q3 = quantile(trig_lab, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Triglycerides") %>%
select(Var, sex, n, Mean, Median, SD, IQR)
waist_s <- mets_sleep %>%
group_by(sex) %>%
summarise(n = n(),
Mean = mean(waist_size, na.rm = T),
Median = median(waist_size, na.rm = T),
SD = sd(waist_size, na.rm = T),
Q1 = quantile(waist_size, na.rm = T, probs = 0.25),
Q3 = quantile(waist_size, na.rm = T, probs = 0.75),
IQR = Q3-Q1) %>%
mutate(Var = "Waist Size") %>%
select(Var, sex, n, Mean, Median, SD, IQR)
#print(mets_descriptive_strat)
mets_descriptive_strat %>%
kbl(col.names = c("Var", "Sex", "n", "Mean", "Median", "SD", "IQR"),
caption = "MetS Components - Stratified Descriptive Stats",
digits = c(0, 0, 0, 2, 2, 2, 2)) %>%
kable_classic_2(font_size = 18,
html_font = "Cambria",
full_width = T)
| Var | Sex | n | Mean | Median | SD | IQR |
|---|---|---|---|---|---|---|
| Systolic | Male | 1179 | 127.85 | 126.00 | 18.44 | 20.67 |
| Systolic | Female | 1240 | 126.17 | 123.33 | 21.79 | 26.67 |
| Diastolic | Male | 1179 | 74.38 | 74.00 | 11.98 | 13.33 |
| Diastolic | Female | 1240 | 70.95 | 72.00 | 12.65 | 12.67 |
| Resting Glucose | Male | 1179 | 116.46 | 106.00 | 37.70 | 17.00 |
| Resting Glucose | Female | 1240 | 112.25 | 103.00 | 35.44 | 18.55 |
| HDL | Male | 1179 | 48.91 | 48.00 | 12.85 | 14.00 |
| HDL | Female | 1240 | 58.10 | 57.00 | 15.74 | 18.00 |
| Triglycerides | Male | 1179 | 124.69 | 102.00 | 128.26 | 71.00 |
| Triglycerides | Female | 1240 | 103.92 | 91.50 | 66.91 | 64.25 |
| Waist Size | Male | 1179 | 101.55 | 101.40 | 15.85 | 19.95 |
| Waist Size | Female | 1240 | 99.03 | 98.55 | 17.03 | 22.35 |
2. For each of the five criteria used to ascertain
MetS, compute the proportion of study subjects who have met each
criterion (e.g., what proportion have elevated waist circumference?) in
the overall sample. Note that this may require an additional data
processing step to create the relevant binary variables to indicate
whether each criterion has been met. (5 points)
large_Waist <- mets_comp2 %>%
filter(!is.na(waist_large)) %>%
group_by(waist_large) %>%
summarise(n=n()) %>%
mutate(Var = "Large Waist Circumference",
value = waist_large,
P=n/sum(n),
Of=sum(n),
Perc = P*100) %>%
select(Var, value, n, Of, Perc)
trig_elev <- mets_comp2 %>%
filter(!is.na(elev_trigl)) %>%
group_by(elev_trigl) %>%
summarise(n=n()) %>%
mutate(Var = "Elevated Triglycerides",
value = elev_trigl,
P=n/sum(n),
Of=sum(n),
Perc = P*100) %>%
select(Var, value, n, Of, Perc)
hdl_low <- mets_comp2 %>%
filter(!is.na(low_hdl)) %>%
group_by(low_hdl) %>%
summarise(n=n()) %>%
mutate(Var = "Low HDL",
value = low_hdl,
P=n/sum(n),
Of=sum(n),
Perc = P*100) %>%
select(Var, value, n, Of, Perc)
gluc_elev <- mets_comp2 %>%
filter(!is.na(elev_gluc)) %>%
group_by(elev_gluc) %>%
summarise(n=n()) %>%
mutate(Var = "Elevated Fasting Glucose",
value = elev_gluc,
P=n/sum(n),
Of=sum(n),
Perc = P*100) %>%
select(Var, value, n, Of, Perc)
bp_elev <- mets_comp2 %>%
filter(!is.na(elev_bp)) %>%
group_by(elev_bp) %>%
summarise(n=n()) %>%
mutate(Var = "Elevated Blood Pressure",
value = elev_bp,
P=n/sum(n),
Of=sum(n),
Perc = P*100) %>%
select(Var, value, n, Of, Perc)
binarydf <- mets_binary %>%
filter(value == 1) %>%
select(Var, n, Of, Perc)
binarydf %>%
kbl(col.names = c("Var", "n (Yes)", "Total", "Proportion (%)"),
caption = "MetS Components - Proportions",
digits = c(0, 0, 0, 2)) %>%
kable_classic_2(font_size = 18,
html_font = "Cambria",
full_width = T)
| Var | n (Yes) | Total | Proportion (%) |
|---|---|---|---|
| Large Waist Circumference | 1411 | 2419 | 58.33 |
| Elevated Triglycerides | 453 | 2419 | 18.73 |
| Low HDL | 642 | 2419 | 26.54 |
| Elevated Fasting Glucose | 1615 | 2419 | 66.76 |
| Elevated Blood Pressure | 916 | 2419 | 37.87 |
3. What is the mean sleep duration of participants
who had not met any of the criteria from the previous question, met
exactly 1 of the criteria, exactly 2, 3, 4 or 5? Hint: R can interpret
logical indicators (TRUE or FALSE) as numeric values, where FALSE=0 and
TRUE=1, when mathematical operations (e.g., addition/subtraction) are
applied to them. (5 points)
mets_sleep_q3 <- mets_sleep %>%
select(sleep, waist_large, elev_trigl, low_hdl, elev_gluc, elev_bp, MetS) %>%
mutate(mets_score = sum(waist_large, elev_trigl, low_hdl, elev_gluc, elev_bp))
mets_sleep_q3 %>%
filter(!is.na(mets_score)) %>%
group_by(mets_score) %>%
summarise(mean_sleep = mean(sleep),
sd = sd(sleep),
count = n()) %>%
select(mets_score, count, mean_sleep, sd) %>%
kbl(col.names = c("# MetS Criteria Met", "n", "Mean daily sleep hours", "SD"),
caption = "Mean Sleep Hours by number of MetS criteria met",
digits = c(0, 0, 4, 4),
align= "cc",
booktabs=TRUE) %>%
kable_classic_2(font_size = 18,
html_font = "Cambria",
full_width = T)
| # MetS Criteria Met | n | Mean daily sleep hours | SD |
|---|---|---|---|
| 0 | 289 | 7.6038 | 1.3660 |
| 1 | 541 | 7.3725 | 1.5149 |
| 2 | 684 | 7.4306 | 1.4535 |
| 3 | 556 | 7.4110 | 1.5739 |
| 4 | 285 | 7.4474 | 1.4574 |
| 5 | 64 | 7.3984 | 1.5512 |
4. Repeat the analysis in the previous question but instead compute the proportion of participants who met at least 1, at least 2, at least 3, and at least 4 criteria. (5 points)
mets_sleep_q4 <- mets_sleep_q3 %>%
mutate(mets_1 = ifelse(mets_score >= 1, 1, 0),
mets_2 = ifelse(mets_score >= 2, 1, 0),
mets_3 = ifelse(mets_score >= 3, 1, 0),
mets_4 = ifelse(mets_score >= 4, 1, 0))
mets_one <- mets_sleep_q4 %>%
filter(mets_1 == 1) %>%
ungroup() %>%
summarize(mean_sleep = mean(sleep),
sd = sd(sleep),
count = n()) %>%
mutate(mets_score2 = "1 or more") %>%
select(mets_score2, count, mean_sleep, sd)
mets_two <- mets_sleep_q4 %>%
filter(mets_2 == 1) %>%
ungroup() %>%
summarize(mean_sleep = mean(sleep),
sd = sd(sleep),
count = n()) %>%
mutate(mets_score2 = "2 or more") %>%
select(mets_score2, count, mean_sleep, sd)
mets_three <- mets_sleep_q4 %>%
filter(mets_3 == 1) %>%
ungroup() %>%
summarize(mean_sleep = mean(sleep),
sd = sd(sleep),
count = n()) %>%
mutate(mets_score2 = "3 or more") %>%
select(mets_score2, count, mean_sleep, sd)
mets_four <- mets_sleep_q4 %>%
filter(mets_4 == 1) %>%
ungroup() %>%
summarize(mean_sleep = mean(sleep),
sd = sd(sleep),
count = n()) %>%
mutate(mets_score2 = "4 or more") %>%
select(mets_score2, count, mean_sleep, sd)
mets_atleast %>%
kbl(col.names = c("# MetS Criteria Met", "n", "Mean daily sleep hours", "SD"),
digits = c(0, 0, 4, 4),
align= "cccc",
booktabs=TRUE) %>%
kable_classic_2(font_size = 18,
html_font = "Cambria",
full_width = T)
| # MetS Criteria Met | n | Mean daily sleep hours | SD |
|---|---|---|---|
| 1 or more | 2130 | 7.4120 | 1.5036 |
| 2 or more | 1589 | 7.4254 | 1.5000 |
| 3 or more | 905 | 7.4215 | 1.5350 |
| 4 or more | 349 | 7.4384 | 1.4729 |
sens <- mets_sleep_q3 %>%
select(MetS, waist_large, low_hdl) %>%
mutate(tp1 = ifelse(waist_large == 1 & MetS == "MetS+", 1, 0),
fn1 = ifelse(waist_large == 0 & MetS == "MetS+", 1, 0),
fp1 = ifelse(waist_large == 1 & MetS == "MetS-", 1, 0),
tn1 = ifelse(waist_large == 0 & MetS == "MetS-", 1, 0),
tp2 = ifelse((waist_large == 1 | low_hdl == 1) & MetS == "MetS+", 1, 0),
fn2 = ifelse((waist_large == 0 & low_hdl == 0) & MetS == "MetS+", 1, 0),
fp2 = ifelse((waist_large == 1 | low_hdl == 1) & MetS == "MetS-", 1, 0),
tn2 = ifelse((waist_large == 0 & low_hdl == 0) & MetS == "MetS-", 1, 0),
tp3 = ifelse((waist_large == 1 & low_hdl == 1) & MetS == "MetS+", 1, 0),
fn3 = ifelse((waist_large == 0 | low_hdl == 0) & MetS == "MetS+", 1, 0),
fp3 = ifelse((waist_large == 1 & low_hdl == 1) & MetS == "MetS-", 1, 0),
tn3 = ifelse((waist_large == 0 | low_hdl == 0) & MetS == "MetS-", 1, 0))
tp1_sum = sum(sens$tp1, na.rm=TRUE)
fn1_sum = sum(sens$fn1, na.rm=TRUE)
fp1_sum = sum(sens$fp1, na.rm=TRUE)
tn1_sum = sum(sens$tn1, na.rm=TRUE)
sen1 = tp1_sum / (tp1_sum + fn1_sum)
spec1 = tn1_sum / (tn1_sum + fp1_sum)
tp2_sum = sum(sens$tp2, na.rm=TRUE)
fn2_sum = sum(sens$fn2, na.rm=TRUE)
fp2_sum = sum(sens$fp2, na.rm=TRUE)
tn2_sum = sum(sens$tn2, na.rm=TRUE)
sen2 = tp2_sum / (tp2_sum + fn2_sum)
spec2 = tn2_sum / (tn2_sum + fp2_sum)
tp3_sum = sum(sens$tp3, na.rm=TRUE)
fn3_sum = sum(sens$fn3, na.rm=TRUE)
fp3_sum = sum(sens$fp3, na.rm=TRUE)
tn3_sum = sum(sens$tn3, na.rm=TRUE)
sen3 = tp3_sum / (tp3_sum + fn3_sum)
spec3 = tn3_sum / (tn3_sum + fp3_sum)
Standard = meets the definition of 3 of the criteria
Sensitivity and Specificity of each alternate test compared to the standard:
Method 1: MetS+ based on elevated waste size as only criteria
Sensitivity = 89.17%
Specificity = 60.11%
Sensitivity = 96.91%
Specificity = 54.95%
Sensitivity = 47.85%
Specificity = 95.97%
6. The packages table1 and TableOne contain functions to create tables like Table 1 in the Smiley paper. In general, a “Table 1” (which may not necessary be the first table) numerically describes the data used for analysis. Use either package to create a Table 1 similar to that in the Smiley paper. However, you may omit p-values for this exercise and ignore the Sitting (minutes/day) variable. (10 points)
labels <- list(
variables=list(sex="Sex, n(%)",
race = "Race/Ethnicity, n(%)",
marital = "Marital Status, n(%)",
education = "Education, n(%)",
dia = "Diastolic Blood Pressure (mmHg)",
sys = "Systolic Blood Pressure (mmHg)",
sit_time = "Sitting (Minutes/Day)",
sleep = "Sleep Duration (Hours)",
glucose_lab = "Fasting Blood Glucose (mg/dL)",
hdl_lab = "High Density Lipoprotein (mg/dL)",
age="Age (Years)",
waist_size = "Waist Circumference (cm)"),
groups=list("", "Metabolic Syndrome"))
# Remove MetS from the name of the levels of the groups
levels(mets_sleep$MetS) <- c("Yes", "No")
caption <- "Participants - NHANES per Metabolic Syndrome"
strata <- c(list(Total=mets_sleep), split(mets_sleep, mets_sleep$MetS))
my.render.cont <- function(x) {
with(stats.apply.rounding(stats.default(x), digits=4, digits.pct=2), c("",
"Mean (SD)"=sprintf("%s (%s)", MEAN, SD)))
}
my.render.cat <- function(x) {
c("", sapply(stats.default(x), function(y) with(y,
sprintf("%d (%0.2f%%)", FREQ, PCT))))
}
table1(strata, labels, groupspan = c(1, 2),
caption=caption,
#footnote=footnote,
render.continuous=my.render.cont, render.categorical=my.render.cat)
Metabolic Syndrome |
|||
|---|---|---|---|
| Total (N=2419) |
Yes (N=905) |
No (N=1514) |
|
| Sex, n(%) | |||
| Male | 1179 (48.74%) | 404 (44.64%) | 775 (51.19%) |
| Female | 1240 (51.26%) | 501 (55.36%) | 739 (48.81%) |
| Race/Ethnicity, n(%) | |||
| Hispanic | 561 (23.19%) | 255 (28.18%) | 306 (20.21%) |
| White | 800 (33.07%) | 311 (34.36%) | 489 (32.30%) |
| Black | 578 (23.89%) | 179 (19.78%) | 399 (26.35%) |
| Asian | 350 (14.47%) | 104 (11.49%) | 246 (16.25%) |
| Multiracial (Other) | 130 (5.37%) | 56 (6.19%) | 74 (4.89%) |
| Marital Status, n(%) | |||
| Married or Living with partner | 1418 (58.62%) | 530 (58.56%) | 888 (58.65%) |
| Widowed | 181 (7.48%) | 87 (9.61%) | 94 (6.21%) |
| Divorced or Separated | 387 (16.00%) | 179 (19.78%) | 208 (13.74%) |
| Never Married | 433 (17.90%) | 109 (12.04%) | 324 (21.40%) |
| Education, n(%) | |||
| < 9th grade | 199 (8.23%) | 101 (11.16%) | 98 (6.47%) |
| 9th-11th grade | 291 (12.03%) | 132 (14.59%) | 159 (10.50%) |
| High School | 569 (23.52%) | 226 (24.97%) | 343 (22.66%) |
| Some College | 783 (32.37%) | 283 (31.27%) | 500 (33.03%) |
| >= College Graduate | 576 (23.81%) | 163 (18.01%) | 413 (27.28%) |
| Missing | 1 (0.0%) | 0 (0%) | 1 (0.1%) |
| Diastolic Blood Pressure (mmHg) | |||
| Mean (SD) | 72.62 (12.44) | 75.51 (13.53) | 70.90 (11.41) |
| Systolic Blood Pressure (mmHg) | |||
| Mean (SD) | 127.0 (20.24) | 136.4 (20.68) | 121.3 (17.71) |
| Sitting (Minutes/Day) | |||
| Mean (SD) | 366.0 (621.6) | 402.1 (809.5) | 344.4 (473.7) |
| Missing | 3 (0.1%) | 0 (0%) | 3 (0.2%) |
| Sleep Duration (Hours) | |||
| Mean (SD) | 7.435 (1.489) | 7.422 (1.535) | 7.443 (1.461) |
| Fasting Blood Glucose (mg/dL) | |||
| Mean (SD) | 114.3 (36.61) | 129.2 (46.42) | 105.4 (25.32) |
| High Density Lipoprotein (mg/dL) | |||
| Mean (SD) | 53.62 (15.12) | 46.30 (12.16) | 57.99 (15.03) |
| Age (Years) | |||
| Mean (SD) | 51.46 (17.56) | 56.30 (15.57) | 48.58 (18.05) |
| Waist Circumference (cm) | |||
| Mean (SD) | 100.3 (16.51) | 109.7 (14.85) | 94.59 (14.76) |