#RESEARCH QUESTION: What individual and behavioral factors are associated with the number of mentally unhealthy days reported by U.S. adults, and does the association between currentsmoking and mental health differ by sex?
#Loading required packages
library(tidyverse)
library(haven)
library(broom)
library(kableExtra)
library(car)
library(ggeffects)
library(gtsummary)#importing LLCP2023.XPT using read_xpt().
brfss_raw <- read_xpt("C:/Users/userp/OneDrive/Рабочий стол/HSTA553/R files/LLCP2023.XPT")
dim(brfss_raw)## [1] 433323 350
## [1] 433323
## [1] 350
#8-11 from Part 0
brfss_subset <- brfss_raw %>%
select(
MENTHLTH, PHYSHLTH, `_BMI5`, SEXVAR, EXERANY2,
`_AGEG5YR`, `_INCOMG1`, EDUCA, `_SMOKER3`
)
brfss_clean <- brfss_subset %>%
mutate(
menthlth_days = case_when(
MENTHLTH == 88 ~ 0,
MENTHLTH %in% c(77, 99) ~ NA_real_,
MENTHLTH >= 0 & MENTHLTH <= 30 ~ as.numeric(MENTHLTH),
TRUE ~ NA_real_
),
physhlth_days = case_when(
PHYSHLTH == 88 ~ 0,
PHYSHLTH %in% c(77, 99) ~ NA_real_,
PHYSHLTH >= 0 & PHYSHLTH <= 30 ~ as.numeric(PHYSHLTH),
TRUE ~ NA_real_
),
bmi = case_when(
`_BMI5` == 9999 ~ NA_real_,
TRUE ~ as.numeric(`_BMI5`) / 100
),
sex = factor(
case_when(
SEXVAR == 1 ~ "Male",
SEXVAR == 2 ~ "Female",
TRUE ~ NA_character_
),
levels = c("Male", "Female")
),
exercise = factor(
case_when(
EXERANY2 == 1 ~ "Yes",
EXERANY2 == 2 ~ "No",
EXERANY2 %in% c(7, 9) ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("Yes", "No")
),
age_group = factor(
case_when(
`_AGEG5YR` == 1 ~ "18-24",
`_AGEG5YR` == 2 ~ "25-29",
`_AGEG5YR` == 3 ~ "30-34",
`_AGEG5YR` == 4 ~ "35-39",
`_AGEG5YR` == 5 ~ "40-44",
`_AGEG5YR` == 6 ~ "45-49",
`_AGEG5YR` == 7 ~ "50-54",
`_AGEG5YR` == 8 ~ "55-59",
`_AGEG5YR` == 9 ~ "60-64",
`_AGEG5YR` == 10 ~ "65-69",
`_AGEG5YR` == 11 ~ "70-74",
`_AGEG5YR` == 12 ~ "75-79",
`_AGEG5YR` == 13 ~ "80+",
`_AGEG5YR` == 14 ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("18-24", "25-29", "30-34", "35-39", "40-44", "45-49",
"50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+")
),
income = factor(
case_when(
`_INCOMG1` == 1 ~ "Less than 15000",
`_INCOMG1` == 2 ~ "15000 to less than 25000",
`_INCOMG1` == 3 ~ "25000 to less than 35000",
`_INCOMG1` == 4 ~ "35000 to less than 50000",
`_INCOMG1` == 5 ~ "50000 to less than 100000",
`_INCOMG1` == 6 ~ "100000 to less than 200000",
`_INCOMG1` == 7 ~ "200000 or more",
`_INCOMG1` == 9 ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("Less than 15000",
"15000 to less than 25000",
"25000 to less than 35000",
"35000 to less than 50000",
"50000 to less than 100000",
"100000 to less than 200000",
"200000 or more")
),
education = factor(
case_when(
EDUCA %in% c(1, 2) ~ "Less than high school",
EDUCA == 3 ~ "High school diploma or GED",
EDUCA == 4 ~ "Some college or technical school",
EDUCA == 5 ~ "College graduate",
EDUCA == 6 ~ "Graduate or professional degree",
EDUCA == 9 ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("Less than high school",
"High school diploma or GED",
"Some college or technical school",
"College graduate",
"Graduate or professional degree")
),
smoking = factor(
case_when(
`_SMOKER3` == 1 ~ "Current daily smoker",
`_SMOKER3` == 2 ~ "Current some-day smoker",
`_SMOKER3` == 3 ~ "Former smoker",
`_SMOKER3` == 4 ~ "Never smoker",
`_SMOKER3` == 9 ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("Current daily smoker",
"Current some-day smoker",
"Former smoker",
"Never smoker")
)
)
missing_summary <- function(x) {
tibble(
n_missing = sum(is.na(x)),
pct_missing = mean(is.na(x)) * 100
)
}
missing_summary(brfss_clean$menthlth_days)## # A tibble: 1 × 2
## n_missing pct_missing
## <int> <dbl>
## 1 8108 1.87
## # A tibble: 1 × 2
## n_missing pct_missing
## <int> <dbl>
## 1 10785 2.49
## # A tibble: 1 × 2
## n_missing pct_missing
## <int> <dbl>
## 1 23062 5.32
set.seed(1220)
brfss_analytic <- brfss_clean %>%
drop_na(menthlth_days, physhlth_days, bmi, sex, exercise,
age_group, income, education, smoking) %>%
slice_sample(n = 8000)
nrow(brfss_analytic)## [1] 8000
tbl1 <- tbl_summary(
brfss_analytic,
by = sex,
statistic = all_continuous() ~ "{mean} ({sd})",
missing = "no",
label = list(
menthlth_days ~ "Mentally unhealthy days",
physhlth_days ~ "Physically unhealthy days",
bmi ~ "Body mass index",
exercise ~ "Any exercise in past 30 days",
age_group ~ "Age group",
income ~ "Annual household income",
education ~ "Education",
smoking ~ "Smoking status"
)
) %>%
bold_labels()
tbl1| Characteristic | Male N = 3,9361 |
Female N = 4,0641 |
|---|---|---|
| NUMBER OF DAYS MENTAL HEALTH NOT GOOD | 61 (37) | 52 (39) |
| NUMBER OF DAYS PHYSICAL HEALTH NOT GOOD | 60 (38) | 55 (39) |
| COMPUTED BODY MASS INDEX | 2,871 (597) | 2,869 (698) |
| SEX OF RESPONDENT | ||
| 1 | 3,936 (100%) | 0 (0%) |
| 2 | 0 (0%) | 4,064 (100%) |
| EXERCISE IN PAST 30 DAYS | ||
| 1 | 3,146 (80%) | 3,094 (76%) |
| 2 | 790 (20%) | 970 (24%) |
| REPORTED AGE IN FIVE-YEAR AGE CATEGORIES | 7.5 (3.6) | 7.9 (3.5) |
| COMPUTED INCOME CATEGORIES | ||
| 1 | 160 (4.1%) | 247 (6.1%) |
| 2 | 271 (6.9%) | 370 (9.1%) |
| 3 | 376 (9.6%) | 495 (12%) |
| 4 | 482 (12%) | 585 (14%) |
| 5 | 1,251 (32%) | 1,260 (31%) |
| 6 | 996 (25%) | 869 (21%) |
| 7 | 400 (10%) | 238 (5.9%) |
| EDUCATION LEVEL | ||
| 1 | 4 (0.1%) | 2 (<0.1%) |
| 2 | 71 (1.8%) | 47 (1.2%) |
| 3 | 130 (3.3%) | 122 (3.0%) |
| 4 | 950 (24%) | 877 (22%) |
| 5 | 1,018 (26%) | 1,120 (28%) |
| 6 | 1,763 (45%) | 1,896 (47%) |
| COMPUTED SMOKING STATUS | ||
| 1 | 339 (8.6%) | 319 (7.8%) |
| 2 | 151 (3.8%) | 117 (2.9%) |
| 3 | 1,207 (31%) | 1,055 (26%) |
| 4 | 2,239 (57%) | 2,573 (63%) |
| Mentally unhealthy days | 4 (8) | 5 (9) |
| Physically unhealthy days | 4 (8) | 5 (9) |
| Body mass index | 29 (6) | 29 (7) |
| Any exercise in past 30 days | 3,146 (80%) | 3,094 (76%) |
| Age group | ||
| 18-24 | 235 (6.0%) | 171 (4.2%) |
| 25-29 | 219 (5.6%) | 189 (4.7%) |
| 30-34 | 253 (6.4%) | 210 (5.2%) |
| 35-39 | 263 (6.7%) | 302 (7.4%) |
| 40-44 | 290 (7.4%) | 292 (7.2%) |
| 45-49 | 266 (6.8%) | 252 (6.2%) |
| 50-54 | 305 (7.7%) | 303 (7.5%) |
| 55-59 | 308 (7.8%) | 352 (8.7%) |
| 60-64 | 408 (10%) | 379 (9.3%) |
| 65-69 | 418 (11%) | 483 (12%) |
| 70-74 | 382 (9.7%) | 426 (10%) |
| 75-79 | 325 (8.3%) | 338 (8.3%) |
| 80+ | 264 (6.7%) | 367 (9.0%) |
| Annual household income | ||
| Less than 15000 | 160 (4.1%) | 247 (6.1%) |
| 15000 to less than 25000 | 271 (6.9%) | 370 (9.1%) |
| 25000 to less than 35000 | 376 (9.6%) | 495 (12%) |
| 35000 to less than 50000 | 482 (12%) | 585 (14%) |
| 50000 to less than 100000 | 1,251 (32%) | 1,260 (31%) |
| 100000 to less than 200000 | 996 (25%) | 869 (21%) |
| 200000 or more | 400 (10%) | 238 (5.9%) |
| Education | ||
| Less than high school | 75 (1.9%) | 49 (1.2%) |
| High school diploma or GED | 130 (3.3%) | 122 (3.0%) |
| Some college or technical school | 950 (24%) | 877 (22%) |
| College graduate | 1,018 (26%) | 1,120 (28%) |
| Graduate or professional degree | 1,763 (45%) | 1,896 (47%) |
| Smoking status | ||
| Current daily smoker | 339 (8.6%) | 319 (7.8%) |
| Current some-day smoker | 151 (3.8%) | 117 (2.9%) |
| Former smoker | 1,207 (31%) | 1,055 (26%) |
| Never smoker | 2,239 (57%) | 2,573 (63%) |
| 1 Mean (SD); n (%) | ||
##Research question: What is the independent association of each predictor with the number of mentally unhealthy days in the past 30 days?
model1 <- lm(
menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group,
data = brfss_analytic
)
summary(model1)##
## Call:
## lm(formula = menthlth_days ~ physhlth_days + bmi + sex + exercise +
## age_group, data = brfss_analytic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.1933 -3.8849 -1.6633 0.3745 30.0200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.063080 0.511138 9.906 < 2e-16 ***
## physhlth_days 0.294315 0.009943 29.602 < 2e-16 ***
## bmi 0.028743 0.013230 2.173 0.02984 *
## sexFemale 1.476326 0.167284 8.825 < 2e-16 ***
## exerciseNo 1.025123 0.211754 4.841 1.32e-06 ***
## age_group25-29 -0.905311 0.522321 -1.733 0.08309 .
## age_group30-34 -0.982383 0.507208 -1.937 0.05280 .
## age_group35-39 -1.596608 0.486392 -3.283 0.00103 **
## age_group40-44 -2.766824 0.483553 -5.722 1.09e-08 ***
## age_group45-49 -3.013299 0.495926 -6.076 1.29e-09 ***
## age_group50-54 -3.401010 0.479874 -7.087 1.48e-12 ***
## age_group55-59 -4.380244 0.473009 -9.260 < 2e-16 ***
## age_group60-64 -4.306683 0.457238 -9.419 < 2e-16 ***
## age_group65-69 -5.156026 0.446969 -11.536 < 2e-16 ***
## age_group70-74 -5.776319 0.454363 -12.713 < 2e-16 ***
## age_group75-79 -6.073508 0.471096 -12.892 < 2e-16 ***
## age_group80+ -6.660143 0.476501 -13.977 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.447 on 7983 degrees of freedom
## Multiple R-squared: 0.1626, Adjusted R-squared: 0.161
## F-statistic: 96.91 on 16 and 7983 DF, p-value: < 2.2e-16
#Interpretation:In the multiple linear regression model, several factors were independently associated with mentally unhealthy days. More physically unhealthy days were strongly associated with more mentally unhealthy days. Females reported more mentally unhealthy days than males, and individuals who did not exercise reported worse mental health than those who did. Higher BMI was associated with slightly more mentally unhealthy days, although the effect was small. Older age groups reported fewer mentally unhealthy days compared to younger adults.
menthlth_days=5.063+0.294(physhlth_days)+0.029(bmi)+1.476(Female)+1.025(No exercise)−0.905(Age 25–29)
#Reference groups: Male, exercised (Yes), age 18–24.
#physhlth_days: Each additional physically unhealthy day is associated with an increase of 0.294 mentally unhealthy days, holding all other variables constant. #bmi: Each 1-unit increase in BMI is associated with an increase of 0.029 mentally unhealthy days, holding other variables constant. This effect is small. #Sex (Female vs Male): Females report 1.476 more mentally unhealthy days on average compared to males, holding other variables constant. #Exercise (Yes vs No reference): Individuals who exercise report 1.025 fewer mentally unhealthy days compared to those who do not exercise, holding other variables constant. #Age group (example: 40–44 vs 18–24): Adults aged 40–44 report 2.767 fewer mentally unhealthy days compared to those aged 18–24, holding other variables constant. #Income Income: $50,000–$100,000 vs <$15,000: Individuals in this income group report fewer mentally unhealthy days compared to those earning less than $15,000, holding other variables constant. Income: ≥$200,000 vs <$15,000: Individuals in the highest income group report substantially fewer mentally unhealthy days compared to the lowest income group, holding other variables constant.
#R-squared: The R-squared is 0.163, meaning that about 16.3% of the variation in mentally unhealthy days is explained by the predictors in the model. #Adjusted R-squared: The adjusted R-squared is 0.161, which is slightly lower than the R-squared because it accounts for the number of predictors in the model. This suggests that the included variables contribute meaningful information and that the model is not overfitting. #Root MSE (Residual Standard Error): The root MSE is 7.447, meaning that the model’s predictions differ from the observed number of mentally unhealthy days by about 7.45 days on average. #Overall F-test: The null hypothesis (H₀) is that all regression coefficients (except the intercept) are equal to zero, meaning none of the predictors are associated with mentally unhealthy days. The F-statistic is 96.91 with 16 and 7983 degrees of freedom, and the p-value is < 0.001. Since the p-value is very small, we reject the null hypothesis and conclude that the model as a whole is statistically significant, meaning at least one predictor is associated with mentally unhealthy days.
## Anova Table (Type III tests)
##
## Response: menthlth_days
## Sum Sq Df F value Pr(>F)
## (Intercept) 5442 1 98.1191 < 2.2e-16 ***
## physhlth_days 48599 1 876.2602 < 2.2e-16 ***
## bmi 262 1 4.7201 0.02984 *
## sex 4320 1 77.8858 < 2.2e-16 ***
## exercise 1300 1 23.4363 1.315e-06 ***
## age_group 29864 12 44.8713 < 2.2e-16 ***
## Residuals 442748 7983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#type III sums of squares were used to test the independent association of each predictor with mentally unhealthy days. At α = 0.05, all predictors were statistically significant, including physically unhealthy days, BMI, sex, exercise, and age group (all p < 0.05). This means that each variable is independently associated with mentally unhealthy days after adjusting for the others. Age group was also significant overall, indicating differences between at least one age category and the reference group.
model_full <- lm(
menthlth_days ~ physhlth_days + bmi + sex + exercise +
age_group + income + education + smoking,
data = brfss_analytic
)
model_reduced <- lm(
menthlth_days ~ physhlth_days + bmi + sex + exercise +
age_group + education + smoking,
data = brfss_analytic
)
anova(model_reduced, model_full)## Analysis of Variance Table
##
## Model 1: menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group +
## education + smoking
## Model 2: menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group +
## income + education + smoking
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 7976 435484
## 2 7970 430750 6 4733.9 14.598 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#A chunk test was conducted to assess whether income collectively improves the model after adjusting for other predictors.H₀: All income coefficients = 0 (income does not improve the model).Hₐ: At least one income coefficient ≠ 0 (income improves the model).The comparison of the reduced and full models resulted in an F-statistic of 14.60 with 6 and 7970 degrees of freedom, and a p-value of < 0.001..Since the p-value is less than 0.05, we reject the null hypothesis and conclude that income significantly improves the model and is an important predictor of mentally unhealthy days.
model_reduced_edu <- lm(
menthlth_days ~ physhlth_days + bmi + sex + exercise +
age_group + income + smoking,
data = brfss_analytic
)
model_full <- lm(
menthlth_days ~ physhlth_days + bmi + sex + exercise +
age_group + income + education + smoking,
data = brfss_analytic
)
anova(model_reduced_edu, model_full)## Analysis of Variance Table
##
## Model 1: menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group +
## income + smoking
## Model 2: menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group +
## income + education + smoking
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 7974 432015
## 2 7970 430750 4 1265.2 5.8521 0.0001064 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#A chunk test was conducted to assess whether education collectively improves the model after adjusting for other predictors. H₀: All education coefficients = 0 (education does not improve the model) Hₐ: At least one education coefficient ≠ 0 (education improves the model). The F-statistic was 5.85 with 4 and 7970 degrees of freedom, and the p-value was 0.0001. Since the p-value is less than 0.05, we reject the null hypothesis and conclude that education significantly improves the model and is an important predictor of mentally unhealthy days.
#The Type III results show that all predictors, including physical health, BMI, sex, exercise, and age group, are significantly associated with mentally unhealthy days. Physically unhealthy days had the strongest effect, followed by age group and sex. The chunk tests showed that both income and education significantly improve the model when added as groups of variables. This is important because chunk tests look at the overall effect of a variable with multiple categories, not just individual coefficients. Overall, both health and socioeconomic factors are important for mental health.
model_A <- lm(
menthlth_days ~ physhlth_days + bmi + sex + smoker_current +
exercise + age_group + income + education,
data = brfss_analytic
)
summary(model_A)##
## Call:
## lm(formula = menthlth_days ~ physhlth_days + bmi + sex + smoker_current +
## exercise + age_group + income + education, data = brfss_analytic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.2913 -3.8732 -1.6219 0.6681 30.4937
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.08285 0.90812 6.698 2.25e-11
## physhlth_days 0.26862 0.01007 26.673 < 2e-16
## bmi 0.03341 0.01323 2.525 0.011589
## sexFemale 1.33309 0.16732 7.967 1.84e-15
## smoker_currentCurrent smoker 2.12874 0.27121 7.849 4.74e-15
## exerciseNo 0.67248 0.21503 3.127 0.001770
## age_group25-29 -0.91492 0.51978 -1.760 0.078412
## age_group30-34 -0.88226 0.50606 -1.743 0.081301
## age_group35-39 -1.58102 0.48765 -3.242 0.001191
## age_group40-44 -2.61572 0.48577 -5.385 7.46e-08
## age_group45-49 -2.82462 0.49698 -5.684 1.37e-08
## age_group50-54 -3.26004 0.48206 -6.763 1.45e-11
## age_group55-59 -4.23010 0.47413 -8.922 < 2e-16
## age_group60-64 -4.24840 0.45682 -9.300 < 2e-16
## age_group65-69 -5.23383 0.44671 -11.716 < 2e-16
## age_group70-74 -5.70233 0.45453 -12.546 < 2e-16
## age_group75-79 -5.89774 0.47031 -12.540 < 2e-16
## age_group80+ -6.48879 0.47372 -13.697 < 2e-16
## income15000 to less than 25000 -1.67974 0.46981 -3.575 0.000352
## income25000 to less than 35000 -2.10230 0.44863 -4.686 2.83e-06
## income35000 to less than 50000 -2.58691 0.43898 -5.893 3.95e-09
## income50000 to less than 100000 -3.08227 0.41140 -7.492 7.50e-14
## income100000 to less than 200000 -3.53598 0.43000 -8.223 2.30e-16
## income200000 or more -3.86251 0.50112 -7.708 1.43e-14
## educationHigh school diploma or GED 0.21386 0.81186 0.263 0.792237
## educationSome college or technical school 1.19653 0.69073 1.732 0.083264
## educationCollege graduate 1.90349 0.69219 2.750 0.005974
## educationGraduate or professional degree 1.74564 0.69382 2.516 0.011890
##
## (Intercept) ***
## physhlth_days ***
## bmi *
## sexFemale ***
## smoker_currentCurrent smoker ***
## exerciseNo **
## age_group25-29 .
## age_group30-34 .
## age_group35-39 **
## age_group40-44 ***
## age_group45-49 ***
## age_group50-54 ***
## age_group55-59 ***
## age_group60-64 ***
## age_group65-69 ***
## age_group70-74 ***
## age_group75-79 ***
## age_group80+ ***
## income15000 to less than 25000 ***
## income25000 to less than 35000 ***
## income35000 to less than 50000 ***
## income50000 to less than 100000 ***
## income100000 to less than 200000 ***
## income200000 or more ***
## educationHigh school diploma or GED
## educationSome college or technical school .
## educationCollege graduate **
## educationGraduate or professional degree *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.366 on 7972 degrees of freedom
## Multiple R-squared: 0.182, Adjusted R-squared: 0.1792
## F-statistic: 65.7 on 27 and 7972 DF, p-value: < 2.2e-16
model_B <- lm(
menthlth_days ~ physhlth_days + bmi + sex * smoker_current +
exercise + age_group + income + education,
data = brfss_analytic
)
summary(model_B)##
## Call:
## lm(formula = menthlth_days ~ physhlth_days + bmi + sex * smoker_current +
## exercise + age_group + income + education, data = brfss_analytic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.337 -3.837 -1.604 0.628 30.426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.22653 0.90967 6.845 8.23e-12
## physhlth_days 0.26859 0.01007 26.679 < 2e-16
## bmi 0.03309 0.01323 2.502 0.012380
## sexFemale 1.18553 0.17752 6.678 2.58e-11
## smoker_currentCurrent smoker 1.52079 0.36539 4.162 3.19e-05
## exerciseNo 0.67285 0.21496 3.130 0.001753
## age_group25-29 -0.92018 0.51962 -1.771 0.076616
## age_group30-34 -0.89242 0.50591 -1.764 0.077774
## age_group35-39 -1.59287 0.48752 -3.267 0.001090
## age_group40-44 -2.62864 0.48564 -5.413 6.39e-08
## age_group45-49 -2.84255 0.49687 -5.721 1.10e-08
## age_group50-54 -3.27784 0.48195 -6.801 1.11e-11
## age_group55-59 -4.24987 0.47404 -8.965 < 2e-16
## age_group60-64 -4.26404 0.45671 -9.336 < 2e-16
## age_group65-69 -5.25062 0.44662 -11.756 < 2e-16
## age_group70-74 -5.71106 0.45439 -12.569 < 2e-16
## age_group75-79 -5.90758 0.47018 -12.565 < 2e-16
## age_group80+ -6.49946 0.47359 -13.724 < 2e-16
## income15000 to less than 25000 -1.63574 0.46999 -3.480 0.000503
## income25000 to less than 35000 -2.07457 0.44862 -4.624 3.82e-06
## income35000 to less than 50000 -2.54551 0.43915 -5.796 7.03e-09
## income50000 to less than 100000 -3.04298 0.41157 -7.394 1.57e-13
## income100000 to less than 200000 -3.50972 0.42999 -8.162 3.79e-16
## income200000 or more -3.84047 0.50103 -7.665 2.00e-14
## educationHigh school diploma or GED 0.12556 0.81238 0.155 0.877177
## educationSome college or technical school 1.11789 0.69123 1.617 0.105865
## educationCollege graduate 1.81788 0.69283 2.624 0.008711
## educationGraduate or professional degree 1.66905 0.69428 2.404 0.016240
## sexFemale:smoker_currentCurrent smoker 1.28327 0.51705 2.482 0.013089
##
## (Intercept) ***
## physhlth_days ***
## bmi *
## sexFemale ***
## smoker_currentCurrent smoker ***
## exerciseNo **
## age_group25-29 .
## age_group30-34 .
## age_group35-39 **
## age_group40-44 ***
## age_group45-49 ***
## age_group50-54 ***
## age_group55-59 ***
## age_group60-64 ***
## age_group65-69 ***
## age_group70-74 ***
## age_group75-79 ***
## age_group80+ ***
## income15000 to less than 25000 ***
## income25000 to less than 35000 ***
## income35000 to less than 50000 ***
## income50000 to less than 100000 ***
## income100000 to less than 200000 ***
## income200000 or more ***
## educationHigh school diploma or GED
## educationSome college or technical school
## educationCollege graduate **
## educationGraduate or professional degree *
## sexFemale:smoker_currentCurrent smoker *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.363 on 7971 degrees of freedom
## Multiple R-squared: 0.1826, Adjusted R-squared: 0.1798
## F-statistic: 63.61 on 28 and 7971 DF, p-value: < 2.2e-16
#In Model A, current smokers reported more mentally unhealthy days compared to non-smokers. In Model B, the interaction between sex and smoking was statistically significant (p = 0.013), indicating that the association differs by sex. Among males, current smokers had about 1.52 more mentally unhealthy days than non-smokers, while among females the effect was stronger, with about 2.80 more days. This suggests that smoking has a greater negative impact on mental health among females than males.
#A model comparison was conducted to test whether the interaction between sex and current smoking improves the model. H₀: The interaction term = 0 (no interaction; effect of smoking is the same for both sexes). Hₐ: The interaction term ≠ 0 (the effect of smoking differs by sex). The F-statistic was approximately 6.16 with 1 and 7971 degrees of freedom, and the p-value was 0.013. Since the p-value is less than 0.05, we reject the null hypothesis and conclude that the interaction between sex and smoking is statistically significant
#Effect among men (reference group): Among men, current smokers report about 1.52 more mentally unhealthy days compared to non-smokers, holding other variables constant. #Effect among women: Among women, the effect is 1.52 + 1.28 = 2.80 more mentally unhealthy days compared to non-smokers, holding other variables constant. #These results show that smoking is associated with worse mental health in both sexes, but the effect is stronger in women. Female smokers experience about 1.28 more mentally unhealthy days than male smokers beyond the baseline smoking effect. Overall, this suggests that smoking has a greater negative impact on mental health among women than men.
pred <- ggpredict(model_B, terms = c("smoker_current", "sex"))
ggplot(pred, aes(x = x, y = predicted, color = group)) +
geom_line(size = 1) +
geom_point(size = 3) +
labs(
x = "Smoking status",
y = "Predicted mentally unhealthy days",
color = "Sex",
title = "Predicted Mentally Unhealthy Days by Smoking Status and Sex"
) +
theme_minimal()#The results suggest that smoking is associated with worse mental health, as people who currently smoke report more mentally unhealthy days than those who do not. This pattern is seen in both men and women, but the negative impact appears to be stronger among women. In other words, female smokers experience a greater burden of poor mental health compared to male smokers. These findings highlight the importance of considering sex differences when designing smoking cessation and mental health interventions.
#The results suggest that both health-related and socioeconomic factors are important determinants of mental health among U.S. adults. Physically unhealthy days showed the strongest association with mentally unhealthy days, followed by smoking, sex, age, and income, indicating that both physical health and social factors play key roles. BMI had a small association, and some education categories were not statistically significant, suggesting weaker or inconsistent effects. Because the data are cross-sectional, we cannot determine causality or the direction of relationships, and the results may be affected by self-report bias. Potential confounders that could bias these associations include underlying mental health conditions (such as depression) and access to healthcare, as well as factors like stress or social support.
#Adjusted R-squared accounts for the number of predictors in the model, unlike regular R-squared which always increases when more variables are added. This is important when adding multiple categorical predictors, because each category adds several dummy variables that can inflate R-squared even if they do not meaningfully improve the model. Adjusted R-squared helps determine whether the added variables actually improve model fit after accounting for model complexity. #Chunk tests are useful because they evaluate the overall contribution of a group of related variables, such as income or education, rather than testing each category separately. Individual t-tests may miss the overall importance of a variable if no single category is strongly significant, while the group as a whole
#I did not use AI for this assignment
End of Lab Activity