library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(flextable)
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:purrr':
##
## compose
library(table1)
##
## Attaching package: 'table1'
##
## The following objects are masked from 'package:base':
##
## units, units<-
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(broom)
library(tibble)
library(sjPlot)
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(performance)
library(pubh)
## Loading required package: emmeans
## Loading required package: ggformula
## Loading required package: scales
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
##
## Loading required package: ggridges
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: gtsummary
## #BlackLivesMatter
##
## Attaching package: 'gtsummary'
##
## The following objects are masked from 'package:flextable':
##
## as_flextable, continuous_summary
##
## Loading required package: huxtable
##
## Attaching package: 'huxtable'
##
## The following object is masked from 'package:gtsummary':
##
## as_flextable
##
## The following object is masked from 'package:scales':
##
## number_format
##
## The following objects are masked from 'package:performance':
##
## print_html, print_md
##
## The following object is masked from 'package:sjPlot':
##
## font_size
##
## The following objects are masked from 'package:table1':
##
## label, label<-
##
## The following objects are masked from 'package:flextable':
##
## align, as_flextable, bold, font, height, italic, set_caption,
## valign, width
##
## The following object is masked from 'package:dplyr':
##
## add_rownames
##
## The following object is masked from 'package:ggplot2':
##
## theme_grey
##
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
library(flextable)
library(ggplot2)
library(gtsummary)
data = readRDS(file = "nhanesMetS_version 2 (1).rds")
mets_plus = data %>%
filter(mets.ind == "MetS+")
mets_minus = data %>%
filter(mets.ind == "MetS-")
t_test = t.test(mets_plus$sleep,
mets_minus$sleep) %>%
print()
##
## Welch Two Sample t-test
##
## data: mets_plus$sleep and mets_minus$sleep
## t = -3.8895, df = 1619, p-value = 0.0001045
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.25542827 -0.08417082
## sample estimates:
## mean of x mean of y
## 7.818545 7.988344
Given the fact that the p-value is below 0.05, the null hypothesis is rejected. Thus, there is a statistically significant difference between sleep duration for those with MetS and those without (0.084 v 0.255)
log_reg = glm(mets.ind ~ gender,
data = data,
family = "binomial")
odds_ratio = exp(coef(log_reg))
conf_interval = exp(confint(log_reg))
## Waiting for profiling to be done...
cat("Odds Ratio:",
odds_ratio,
"\n")
## Odds Ratio: 0.02734977 23.223
cat("95% Confidence Interval:",
conf_interval,
"\n")
## 95% Confidence Interval: 0.02140878 18.2442 0.03433066 30.02956
With a calculated odds ratio of 0.0273, along with the confidence interval, there is a significant association between the two variables.
cont_table = table(data$mets.ind, data$gender)
chi_sq = chisq.test(cont_table) %>%
print()
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: cont_table
## X-squared = 1051.4, df = 1, p-value < 2.2e-16
Once again, the p-value is below 0.05 so the null hypothesis is rejected, indicating significant association between the two variables. Furthermore, this is consistent with the findings from Question 2.
model_cat <- glm(mets.ind ~ as.factor(sleep), data = data, family = binomial)
summary_cat <- summary(model_cat)$coefficients[-1, ]
model_num <- glm(mets.ind ~ sleep, data = data, family = binomial)
summary_num <- summary(model_num)$coefficients
result_table <- data.frame(
"Sleep_Category" = rownames(summary_cat),
"OR_Cat" = exp(summary_cat[, 1]),
"P_Value_Cat" = summary_cat[, 4],
"CI_Lower_Cat" = exp(summary_cat[, 1] - 1.96 * summary_cat[, 2]),
"CI_Upper_Cat" = exp(summary_cat[, 1] + 1.96 * summary_cat[, 2]),
"OR_Num" = exp(summary_num[2]),
"P_Value_Num" = summary_num[2, 4],
"CI_Lower_Num" = exp(summary_num[2, 1] - 1.96 * summary_num[2, 2]),
"CI_Upper_Num" = exp(summary_num[2, 1] + 1.96 * summary_num[2, 2])
)
print(result_table)
## Sleep_Category OR_Cat P_Value_Cat CI_Lower_Cat
## as.factor(sleep)5.25 as.factor(sleep)5.25 0.7773109 0.6532090731 0.25902454
## as.factor(sleep)5.5 as.factor(sleep)5.5 0.8371041 0.5706546424 0.45277818
## as.factor(sleep)5.75 as.factor(sleep)5.75 0.6218487 0.3489107116 0.23013461
## as.factor(sleep)6 as.factor(sleep)6 0.5441176 0.0233195200 0.32158457
## as.factor(sleep)6.25 as.factor(sleep)6.25 0.4777618 0.0803502485 0.20876248
## as.factor(sleep)6.5 as.factor(sleep)6.5 0.6582496 0.0981598054 0.40101605
## as.factor(sleep)6.75 as.factor(sleep)6.75 0.6004057 0.0998125277 0.32702920
## as.factor(sleep)7 as.factor(sleep)7 0.5987898 0.0273376850 0.37970396
## as.factor(sleep)7.25 as.factor(sleep)7.25 0.8084034 0.4465805949 0.46747558
## as.factor(sleep)7.5 as.factor(sleep)7.5 0.6588819 0.0715421633 0.41853368
## as.factor(sleep)7.75 as.factor(sleep)7.75 0.6640080 0.1139273686 0.39965715
## as.factor(sleep)8 as.factor(sleep)8 0.4712688 0.0009931007 0.30113870
## as.factor(sleep)8.25 as.factor(sleep)8.25 0.4647672 0.0044383899 0.27415775
## as.factor(sleep)8.5 as.factor(sleep)8.5 0.4951758 0.0032952186 0.30987055
## as.factor(sleep)8.75 as.factor(sleep)8.75 0.3778595 0.0011684955 0.20996654
## as.factor(sleep)9 as.factor(sleep)9 0.5855742 0.0214676056 0.37110081
## as.factor(sleep)9.25 as.factor(sleep)9.25 0.3391902 0.0037623166 0.16323244
## as.factor(sleep)9.5 as.factor(sleep)9.5 0.4525335 0.0030622471 0.26775953
## as.factor(sleep)9.75 as.factor(sleep)9.75 0.4705882 0.0877912137 0.19806135
## as.factor(sleep)10 as.factor(sleep)10 0.4653144 0.0075773944 0.26538496
## as.factor(sleep)10.25 as.factor(sleep)10.25 0.3627451 0.0795868467 0.11674122
## as.factor(sleep)10.5 as.factor(sleep)10.5 0.4352941 0.0176793110 0.21894756
## as.factor(sleep)10.75 as.factor(sleep)10.75 0.2560554 0.0790337395 0.05598252
## as.factor(sleep)11 as.factor(sleep)11 0.5873016 0.1207023299 0.29986621
## CI_Upper_Cat OR_Num P_Value_Num CI_Lower_Num
## as.factor(sleep)5.25 2.3326449 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)5.5 1.5476524 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)5.75 1.6803029 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)6 0.9206412 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)6.25 1.0933783 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)6.5 1.0804869 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)6.75 1.1023082 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)7 0.9442861 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)7.25 1.3979682 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)7.5 1.0372531 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)7.75 1.1032121 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)8 0.7375150 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)8.25 0.7878986 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)8.5 0.7912953 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)8.75 0.6800025 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)9 0.9240001 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)9.25 0.7048232 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)9.5 0.7648152 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)9.75 1.1181045 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)10 0.8158619 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)10.25 1.1271427 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)10.5 0.8654171 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)10.75 1.1711574 0.8972504 8.157831e-05 0.8501358
## as.factor(sleep)11 1.1502568 0.8972504 8.157831e-05 0.8501358
## CI_Upper_Num
## as.factor(sleep)5.25 0.9469761
## as.factor(sleep)5.5 0.9469761
## as.factor(sleep)5.75 0.9469761
## as.factor(sleep)6 0.9469761
## as.factor(sleep)6.25 0.9469761
## as.factor(sleep)6.5 0.9469761
## as.factor(sleep)6.75 0.9469761
## as.factor(sleep)7 0.9469761
## as.factor(sleep)7.25 0.9469761
## as.factor(sleep)7.5 0.9469761
## as.factor(sleep)7.75 0.9469761
## as.factor(sleep)8 0.9469761
## as.factor(sleep)8.25 0.9469761
## as.factor(sleep)8.5 0.9469761
## as.factor(sleep)8.75 0.9469761
## as.factor(sleep)9 0.9469761
## as.factor(sleep)9.25 0.9469761
## as.factor(sleep)9.5 0.9469761
## as.factor(sleep)9.75 0.9469761
## as.factor(sleep)10 0.9469761
## as.factor(sleep)10.25 0.9469761
## as.factor(sleep)10.5 0.9469761
## as.factor(sleep)10.75 0.9469761
## as.factor(sleep)11 0.9469761
data$sleep <- as.numeric(as.character(data$sleep))
prob_cat <- predict(model_cat, type = "response", se.fit = TRUE)
lower_ci_cat <- prob_cat$fit - 1.96 * prob_cat$se.fit
upper_ci_cat <- prob_cat$fit + 1.96 * prob_cat$se.fit
plot_data_cat <- data.frame(
sleep = data$sleep,
MetS_prob = prob_cat$fit,
lower_ci = lower_ci_cat,
upper_ci = upper_ci_cat
)
ggplot(plot_data_cat, aes(x = sleep, y = MetS_prob)) +
geom_point() +
geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.1) +
labs(title = "Predicted Proportion of MetS+ by Sleep Duration (Categorical Model)",
x = "Sleep Duration (Hours)",
y = "Predicted Proportion of MetS+") +
theme_minimal()
## Part (c)
data$sleep <- as.numeric(as.character(data$sleep))
prob_cat <- predict(model_cat, type = "response", se.fit = TRUE)
lower_ci_cat <- prob_cat$fit - 1.96 * prob_cat$se.fit
upper_ci_cat <- prob_cat$fit + 1.96 * prob_cat$se.fit
plot_data_cat <- data.frame(
sleep = data$sleep,
MetS_prob = prob_cat$fit,
lower_ci = lower_ci_cat,
upper_ci = upper_ci_cat,
model = "Categorical"
)
prob_num <- predict(model_num, type = "response", se.fit = TRUE)
lower_ci_num <- prob_num$fit - 1.96 * prob_num$se.fit
upper_ci_num <- prob_num$fit + 1.96 * prob_num$se.fit
plot_data_num <- data.frame(
sleep = data$sleep,
MetS_prob = prob_num$fit,
lower_ci = lower_ci_num,
upper_ci = upper_ci_num,
model = "Numeric"
)
plot_data <- rbind(plot_data_cat, plot_data_num)
ggplot(plot_data, aes(x = sleep, y = MetS_prob, color = model)) +
geom_point() +
geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.1) +
labs(title = "Model-Predicted Values and Error Bars",
x = "Sleep Duration",
y = "Model-Predicted Probability") +
theme_minimal() +
scale_color_manual(values = c("Categorical" = "blue", "Numeric" = "red"))
I could not figure out how to complete this question, I apologize.
model <- lm(sys ~ sleep + gender + ethnicity + married + edu + glu, data = data)
residuals <- resid(model)
histogram <- ggplot(data.frame(residuals), aes(x = residuals)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
labs(title = "Histogram of Residuals", x = "Residuals", y = "Frequency")
qq_plot <- ggplot(data.frame(residuals), aes(sample = residuals)) +
geom_qq_line(color = "blue") +
geom_qq(color = "black") +
labs(title = "Q-Q Plot of Residuals", x = "Theoretical Quantiles", y = "Sample Quantiles")
grid.arrange(histogram, qq_plot, ncol = 2)
Based on the bell-shaped nature of the plot, it can be concluded that
the residuals are approximately “normally distributed”, with little
outliers.
model <- lm(sys ~ sleep + gender + ethnicity + married + edu + glu, data = data)
summary_table <- summary(model)$coefficients[-1, ]
result_table <- data.frame(
"Covariate" = rownames(summary_table),
"Estimate" = summary_table[, 1],
"CI_Lower" = summary_table[, 1] - 1.96 * summary_table[, 2],
"CI_Upper" = summary_table[, 1] + 1.96 * summary_table[, 2],
"p-value" = summary_table[, 4]
)
print(result_table)
## Covariate Estimate CI_Lower CI_Upper p.value
## sleep sleep 0.11539776 -0.35278329 0.58357881 6.290690e-01
## genderMale genderMale 1.47302498 0.30137146 2.64467851 1.380700e-02
## ethnicity ethnicity 0.80407328 0.27932703 1.32881954 2.699434e-03
## married married 2.01243965 1.36603886 2.65884043 1.226250e-09
## edu edu -1.67098775 -2.17599392 -1.16598158 1.081498e-10
## glu glu 0.05435914 0.03837193 0.07034635 3.321479e-11
The analysis reveals that married individuals exhibit higher SBP compared to unmarried individuals and higher education is associated with lower SBP. Additionally, the effect of sleep duration on SBP lacks any statistical significance.