hist_sys <- mets_sleep %>%
ggplot(aes(x=sys)) +
geom_histogram(aes(y=..density..), bins=15, color="black", fill="orange", alpha = .5) +
geom_density() +
geom_vline(aes(xintercept=mean(sys)), color="orange", lwd=1.5, linetype="dashed") +
theme_fivethirtyeight() +
scale_x_continuous(breaks=c(50, 75, 100, mean(mets_sleep$sys), 150, 175, 200, 225),
labels = scales::number_format(accuracy = 1))
hist_sys
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
dens_sys <- mets_sleep %>%
ggplot(aes(x=sys, fill=MetS)) +
geom_density(color="black", alpha=.25) +
theme_fivethirtyeight() +
theme(legend.position = "bottom") +
theme(legend.title=element_blank())
dens_sys1 <- dens_sys +
scale_x_continuous(breaks=c(50, 75, 100, 125, 150, 175, 200, 225),
labels = scales::number_format(accuracy = 1))
dens_sys1
temp <- mets_sleep %>%
group_by(MetS) %>%
summarise(Mean = mean(sys))
dens_sys2 <- dens_sys +
geom_vline(data=temp, aes(xintercept = Mean, color=MetS),
linetype="dashed",
lwd=1.5, show.legend = FALSE) +
scale_x_continuous(breaks=c(50, 75, 100, temp$Mean, 150, 175, 200, 225),
labels = scales::number_format(accuracy = 1))
dens_sys2
combine <- (hist_sys) + (dens_sys1 | dens_sys2) +
plot_annotation(title='Density Distribution of Systolic Blood Pressure',
subtitle='Overall with Mean',
theme=theme(plot.title=element_text(hjust=0.5))) +
plot_layout(nrow = 2,
guides = "collect") &
theme(legend.position = "bottom")
combine
Compute the proportion of MetS+ for each number of hours of sleep
duration in the data. Also, compute the standard error of the
proportions: if the proportion is p, then the standard error (SE) is
√(p(1-p)/n). For this question, you will need to first round the hours
of sleep variable to the nearest whole number and then convert it into a
categorical variable.
mets_sleep_q2a <- mets_sleep %>%
mutate(sleep = factor(round(sleep),
levels=c('2', '3', '4', '5', '6', '7', '8', '9', '10'))) %>%
group_by(sleep) %>%
summarise(n=n(),
nMetS = sum(MetS == "MetS+"),
p = nMetS/n,
se=sqrt(p*(1-p)/n),
.groups="drop")
Plot as points the proportions of MetS+ and add error bars that signify the SEs of these proportions. Sleep duration (categorical) goes on the x-axis here. Briefly describe the relationship between the prevalence of MetS and sleep duration. (5 points)
mets_sleep_q2a %>%
ggplot(aes(x=sleep, y=p)) +
geom_point() +
geom_pointrange(aes(ymin=p+se, ymax=p-se)) +
geom_point() +
theme_bw()
Stratify the above plot by sex using different colors for both the points and error bars. Is the relationship between MetS+ prevalence and sleep duration sex-specific, that is, do we see differences in the association in males and females? (5 points)
Perform a similar analysis as part b). Here instead, use faceting to stratify by sex. (5 points)
mets_sleep_q2b <- mets_sleep %>%
mutate(sleep = factor(round(sleep),
levels=c('2', '3', '4', '5', '6', '7', '8', '9', '10'))) %>%
group_by(sleep, sex) %>%
summarise(n=n(),
nMetS = sum(MetS == "MetS+"),
p = nMetS/n,
se=(sqrt(p*(1-p)/n)),
.groups="drop")
mets_sleep_q2b %>%
ggplot(aes(x=sleep, y=p)) +
facet_wrap(~sex, ncol=1) +
geom_pointrange(aes(ymin = p-se, ymax = p+se, color=sex), show.legend=FALSE) +
scale_y_continuous(limits = c(0, 0.75), breaks=seq(0, 0.7, 0.1)) +
coord_flip() +
theme_bw()
mets_sleep_q3 <- mets_sleep %>%
mutate(MetS = fct_rev(MetS)) %>%
group_by(MetS) %>%
summarise(count=n(),
`Elevated Waist Circumference`=sum(waist_large),
`Elevated Triglycerides`=sum(elev_trigl),
`Low HDL`=sum(low_hdl),
`Elevated Fasting Glucose`=sum(elev_gluc),
`Elevated Blood Pressure`=sum(elev_bp),
mean_age=round(mean(age), digits = 2),
mean_SBP=mean(sys)) %>%
pivot_longer(cols = `Elevated Waist Circumference`:`Elevated Blood Pressure`,
names_to = "criterion",
values_to = "n") %>%
mutate(p = n/count,
se=(sqrt(p*(1-p)/count)),
# mean_age_q = case_when(mean_age <= median(mean_age) ~ 1,
# mean_age > median(mean_age) ~ 2),
mid = median(mean_SBP)) %>%
select(MetS, criterion, n, count, p, se, mean_age, mean_SBP)
temp = mets_sleep_q3 %>%
group_by(criterion, MetS) %>%
summarise(Mean = mean(p),
.groups="drop")
q3 <- mets_sleep_q3 %>%
#mutate(mean_p_text = as.character(round(mean(p), digits=2))) %>%
ggplot(aes(x = criterion,
y = p)) +
geom_errorbar(aes(ymin = p - se,
ymax = p + se),
width = .75,
size = .75) +
geom_label(aes(fill = as_factor(round(mean_SBP, digits=0)),
label = MetS)) +
geom_label(aes(fill = as_factor(round(mean_age, digits=0)),
label = MetS)) +
scale_fill_manual(values = c("red", "orange", "red", "orange")) +
geom_hline(aes(yintercept = mean(p)),
linetype = "dotted", size = 1.5) +
annotate("text", x = 5.75, y = mean(mets_sleep_q3$p),
label = paste("mean(p) = ", round(mean(mets_sleep_q3$p), digits=2)), hjust=1, vjust = -.5) +
scale_y_continuous(breaks=temp$Mean,
labels = scales::number_format(accuracy = .01)) +
ggtitle("% of respondents w/ each component of Met. Syndrome",
subtitle = "Stratified by Metabolic Syndrome Status") +
labs(y = "Probability for Each MetS Criterion",
fill = c("Mean Systolic BP/Mean Age")) +
theme_bw() +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank(),
axis.text.y = element_text(angle = 45, size=8),
panel.grid.major.x = element_line(colour = 'black', linewidth = .25, linetype='dotted'),
panel.grid.major.y = element_line(colour = 'black', linewidth = .25, linetype='dotted'))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
q3