library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(flextable)
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:purrr':
##
## compose
library(table1)
##
## Attaching package: 'table1'
##
## The following objects are masked from 'package:base':
##
## units, units<-
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
data = readRDS(file = "nhanesMetS_version 2 (1).rds")
histogram =
data %>%
ggplot(aes(x = sys)) +
geom_histogram(aes(y = ..density..),
fill = "red", color = "black", binwidth = 2) +
geom_density(color = "blue", linetype = 2)
histogram
The plot conveys a bell-shaped distribution with a very high peak.
density1 =
data %>%
ggplot(aes(x = sys)) +
geom_density(aes(fill = mets.ind, linetype = mets.ind),
color = "black",
linewidth = 1,
alpha = 0.1) +
xlab("Systolic Blood Pressure") +
ylab(NULL) +
scale_x_continuous(trans = "log",
breaks = c(10, 20, 30, 40, 50)) +
scale_fill_manual(values = c("lightblue", "lightpink")) +
theme_classic()
density1
Based on the plot, stratified by MetS status, shows that individuals without MetS are on average have lower systolic blood pressure although the difference is not too significant.
density2 =
data %>%
group_by(mets.ind) %>%
mutate(m = mean(sys, na.rm = T)) %>%
ungroup %>%
ggplot(aes(x = sys)) +
geom_density(aes(fill = mets.ind, linetype = mets.ind),
color = "black",
linewidth = 1,
alpha = 0.1) +
geom_vline(aes(xintercept = m, color = mets.ind),
linetype = 2,
size =2) +
xlab("Systolic Blood Pressure") +
ylab(NULL) +
scale_x_continuous(trans = "log",
breaks = c(10, 20, 30, 40, 50)) +
scale_fill_manual(values = c("lightblue", "lightpink")) +
theme_classic()
density2
As seen in the visualization, the mean for MetS- and MetS+ are right around the peaks of each density plot.
combined_viz =
grid.arrange(histogram,
arrangeGrob(density1,
density2,
ncol = 2),
heights = c(2, 1))
Computing the proportion of MetS+ for each value of sleep and its standard error (SE).
mets_sleep =
data %>%
group_by(sleep) %>%
summarise(
total_count = n(),
mets_count = sum(mets.ind == "MetS+"),
proportion = mets_count / total_count,
standard_error = (proportion * (1 - proportion) / total_count))
mets_sleep
## # A tibble: 25 × 5
## sleep total_count mets_count proportion standard_error
## <dbl> <int> <int> <dbl> <dbl>
## 1 5 108 34 0.315 0.00200
## 2 5.25 19 5 0.263 0.0102
## 3 5.5 90 25 0.278 0.00223
## 4 5.75 27 6 0.222 0.00640
## 5 6 215 43 0.2 0.000744
## 6 6.25 50 9 0.18 0.00295
## 7 6.5 267 62 0.232 0.000668
## 8 6.75 111 24 0.216 0.00153
## 9 7 533 115 0.216 0.000317
## 10 7.25 144 39 0.271 0.00137
## # ℹ 15 more rows
metssleep_viz =
mets_sleep %>%
ggplot(aes(x = sleep,
y = proportion)) +
geom_point() +
geom_errorbar(aes(ymin = proportion - standard_error,
ymax = proportion + standard_error),
width = 0.1) +
labs(x = "Sleep Duration (hrs)",
y = "Proportion of MetS+",
title = "Relationship between MetS+ and Sleep Duration") +
theme_classic()
metssleep_viz
Based on the visualization, there seems to be a downward trend between the proportions of MetS+ and sleep duration.
data %>%
group_by(sleep) %>%
mutate(
total_count = n(),
mets_count = sum(mets.ind == "MetS+"),
proportion = mets_count / total_count,
standard_error = (proportion * (1 - proportion) / total_count)) %>%
ggplot(aes(x = sleep,
y = proportion,
color = gender)) +
geom_point() +
geom_errorbar(aes(ymin = proportion - standard_error,
ymax = proportion + standard_error),
width = 0.1,
position = position_dodge(width = 0.2)) +
labs(x = "Sleep Duration (hrs)",
y = "Proportion of MetS+",
title = "Relationship between MetS+ and Sleep Duration, by gender",
color = "gender") +
theme_classic()
Based on the plot, it seems as if females get more sleep on average and are less likely to be MetS+.
data %>%
group_by(sleep) %>%
mutate(
total_count = n(),
mets_count = sum(mets.ind == "MetS+"),
proportion = mets_count / total_count,
standard_error = (proportion * (1 - proportion) / total_count)) %>%
ggplot(aes(x = sleep,
y = proportion)) +
geom_point() +
geom_errorbar(aes(ymin = proportion - standard_error,
ymax = proportion + standard_error),
width = 0.1) +
labs(x = "Sleep Duration (hrs)",
y = "Proportion of MetS+",
title = "Relationship between MetS+ and Sleep Duration, by sex") +
facet_wrap(~ gender, ncol = 1) +
theme_classic()
pivot_data =
data %>%
pivot_longer(cols = c("crit1", "crit2", "crit3", "crit4", "crit5"),
names_to = "criterion",
values_to = "met_criterion")
criterion_data =
pivot_data %>%
group_by(criterion, met_criterion) %>%
summarise(
total_count = n(),
mets_count = sum(mets.ind == "MetS+"),
proportion = mets_count / total_count,
standard_error = (proportion * (1 - proportion) / total_count),
mean_age = mean(age, na.rm = T),
sys = mean(sys)) %>%
filter(met_criterion == 1) %>%
select(-met_criterion)
## `summarise()` has grouped output by 'criterion'. You can override using the
## `.groups` argument.
criterion_data
## # A tibble: 5 × 7
## # Groups: criterion [5]
## criterion total_count mets_count proportion standard_error mean_age sys
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 crit1 2147 976 0.455 0.000115 53.3 130.
## 2 crit2 450 322 0.716 0.000452 52.7 131.
## 3 crit3 1405 872 0.621 0.000168 51.1 129.
## 4 crit4 1973 772 0.391 0.000121 59.9 143.
## 5 crit5 1503 682 0.454 0.000165 55.4 131.
# I was struggling to find a way to incorporate MetS+ and MetS-,
# so for this last part of the assignment, I took those that had met the criterion
ggplot(criterion_data, aes(x = criterion,
y = proportion)) +
geom_point(aes(size = mean_age,
color = sys)) +
geom_errorbar(aes(ymin = proportion - standard_error,
ymax = proportion + standard_error,
color = sys),
width = 0.2) +
geom_hline(yintercept = mean(criterion_data$proportion),
linetype = "dotted",
color = "black") +
scale_x_discrete(labels = c("Criterion 1",
"Criterion 2",
"Criterion 3",
"Criterion 4",
"Criterion 5")) +
theme(axis.text.x = element_text(angle = 45,
hjust = 1)) +
xlab("Criterion") +
ylab("Proportion") +
theme_classic() +
scale_color_gradient(low = "lightblue", high = "lightpink") +
scale_size_continuous(range = c(3, 8)) +
guides(size = FALSE)