Libraries used
library(readxl) ## For reading Excel files in R
library(dplyr) ## Mutating variables + more
library(kableExtra) ## For making tables
library(ggplot2) ## For making plots
library(tidyverse) ## For making tables
library(lme4) ## For fitting LMM
library(lmerTest)
library(cowplot) ## For making plots to check LMM assumptions
Data cleaning
## Load data
d <- read_xlsx("Participants_Aggregated_Zipcode2.xlsx")
## Data cleaning
#Data needs cleaning -> many variables are the wrong type and some need to be converted
# Commas to periods & convert to numeric for all continuous variables
chrtonum <- c("SBP_T1", "DBP_T1", "HTN_MED_T1", "CHO_T1",
"BMI_T1", "PACKYEARS", "ALCOHOL_INTAKE_T1",
"MWK_VAL", "NSES", "METABOLIC_DISORDER_T1", "MENTAL_DISORDER_T1")
d <- d %>%
mutate_at(vars(all_of(chrtonum)), ~ as.numeric(gsub(",", ".", .)))
# Convert categorical variable to factor part 1
municipalities <- c("Appingedam", "Delfzijl", "Groningen", "Loppersum", "Stadskanaal", "Veendam",
"Pekela", "Oldambt", "Westerwolde", "Midden-Groningen", "Het Hogeland",
"Westerkwartier", "Achtkarspelen", "De Wolden", "Ameland", "Harlingen", "Heerenveen",
"Leeuwarden", "Ooststellingwerf", "Opsterland", "Schiermonnikoog",
"Smallingerland", "Terschelling", "Vlieland", "Weststellingwerf",
"Tytsjerksteradiel", "Dantumadiel", "Súdwest-Fryslân", "De Fryske Marren",
"Waadhoeke", "Noardeast-Fryslân", "Assen", "Coevorden", "Emmen", "Hoogeveen",
"Meppel", "Aa en Hunze", "Borger-Odoorn", "Noordenveld",
"Westerveld", "Tynaarlo", "Midden-Drenthe")
d$MUNICIPALITY <- factor(d$MUNICIPALITY, levels = municipalities)
# Convert categorical variable to factor part 2
d <- d %>%
mutate(GROUP_SIZE_CAT = factor(GROUP_SIZE_CAT,
levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c("1", "2", "3", "4", "5", "6", "7")))
# Creating new variable out of GROUP_SIZE_CAT to account for actual group size
d <- d %>%
mutate(GROUP_SIZE = case_when(
GROUP_SIZE_CAT == 1 ~ 100,
GROUP_SIZE_CAT == 2 ~ 250,
GROUP_SIZE_CAT == 3 ~ 350,
GROUP_SIZE_CAT == 4 ~ 450,
GROUP_SIZE_CAT == 5 ~ 550,
GROUP_SIZE_CAT == 6 ~ 650,
GROUP_SIZE_CAT == 7 ~ 750,
TRUE ~ NA_real_))
# Convert categorical variable to factor part 3
d$ZIPCODE <- factor(as.character(d$ZIPCODE), levels = unique(as.character(d$ZIPCODE)))
# Let's check the data again
str(d)
## tibble [534 × 18] (S3: tbl_df/tbl/data.frame)
## $ MUNICIPALITY : Factor w/ 42 levels "Appingedam","Delfzijl",..: 33 33 33 33 33 33 34 34 34 34 ...
## $ ZIPCODE : Factor w/ 293 levels "7741","7742",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ From : num [1:534] 7740 7740 7740 7740 7740 7740 7760 7760 7760 7760 ...
## $ To : num [1:534] 7756 7756 7756 7756 7756 ...
## $ PROVINCE : chr [1:534] "Drenthe" "Drenthe" "Drenthe" "Drenthe" ...
## $ GROUP_SIZE_CAT : Factor w/ 7 levels "1","2","3","4",..: 2 3 1 2 1 2 1 2 1 1 ...
## $ BMI_T1 : num [1:534] 26.2 25.9 26.7 26 26.1 ...
## $ DBP_T1 : num [1:534] 78.3 73.3 77.4 72.5 77.3 ...
## $ SBP_T1 : num [1:534] 132 125 133 123 132 ...
## $ HTN_MED_T1 : num [1:534] 17.8 11.4 11.5 10.7 13.5 ...
## $ CHO_T1 : num [1:534] 5.23 5.1 5.2 4.94 5.17 5.19 5.14 4.87 5.08 4.9 ...
## $ PACKYEARS : num [1:534] 8.11 6.74 7.74 4.49 5.73 5.78 6.83 4.67 8.74 6.78 ...
## $ METABOLIC_DISORDER_T1: num [1:534] 3.03 2.34 3.11 3.38 3.06 1.72 4.97 3.57 5.62 2.44 ...
## $ ALCOHOL_INTAKE_T1 : num [1:534] 11.01 5 10.62 4.71 9.89 ...
## $ MWK_VAL : num [1:534] 609 447 506 453 337 ...
## $ NSES : num [1:534] -1.35 -1.35 -1.19 -1.2 -0.22 -0.23 -0.1 -0.11 -0.84 -0.83 ...
## $ MENTAL_DISORDER_T1 : num [1:534] 1.14 2.51 1.69 1.96 0.76 1.34 1.14 1.47 0.17 3.38 ...
## $ GROUP_SIZE : num [1:534] 250 350 100 250 100 250 100 250 100 100 ...
#Looks better!
#Variables 'From', 'To', 'PROVINCE', and 'GROUP_SIZE_CAT' will not be used for this analysis
# Check for missing values:
colSums(is.na(d))
## MUNICIPALITY ZIPCODE From
## 0 0 0
## To PROVINCE GROUP_SIZE_CAT
## 0 0 0
## BMI_T1 DBP_T1 SBP_T1
## 0 0 0
## HTN_MED_T1 CHO_T1 PACKYEARS
## 0 0 0
## METABOLIC_DISORDER_T1 ALCOHOL_INTAKE_T1 MWK_VAL
## 0 0 0
## NSES MENTAL_DISORDER_T1 GROUP_SIZE
## 0 0 0
#No missing values
Descriptive statistics
## Descriptives
#calculating zipcodes per municipality
zipcode_counts <- d %>%
group_by(MUNICIPALITY) %>%
summarize(zipcodes_per_municipality = n())
#Making table with all variables + descriptive stats
summary_stats <- d %>%
summarize(
`Number of zipcodes` = (nrow(d)),
`Number of municipalities` = n_distinct(MUNICIPALITY),
`Median number of zipcodes per municipality` = median(zipcode_counts$zipcodes_per_municipality),
`Median group size` = median(GROUP_SIZE),
`Mean Mental Disorder (%)` = mean(MENTAL_DISORDER_T1),
`Mean SBP (mmHg)` = mean(SBP_T1),
`Mean DBP (mmHg)` = mean(DBP_T1),
`Mean HTN Medication Usage (%)` = mean(HTN_MED_T1),
`Mean Cholesterol (mmol/L)` = mean(CHO_T1),
`Mean BMI` = mean(BMI_T1),
`Mean Packyears` = mean(PACKYEARS),
`Mean Alcohol Intake (g)` = mean(ALCOHOL_INTAKE_T1),
`Mean Weekly Physical Activity (minutes)` = mean(MWK_VAL),
`Mean Metabolic Disorder (%)` = mean(METABOLIC_DISORDER_T1),
`Mean NSES (%)` = mean(NSES),
) %>%
pivot_longer(
cols = everything(),
names_to = "Statistic",
values_to = "Value"
)
summary_stats %>%
kbl(caption = "Table of all variables") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Table of all variables
|
Statistic
|
Value
|
|
Number of zipcodes
|
534.0000000
|
|
Number of municipalities
|
37.0000000
|
|
Median number of zipcodes per municipality
|
12.0000000
|
|
Median group size
|
100.0000000
|
|
Mean Mental Disorder (%)
|
1.6171161
|
|
Mean SBP (mmHg)
|
125.2363296
|
|
Mean DBP (mmHg)
|
73.4533895
|
|
Mean HTN Medication Usage (%)
|
12.2505993
|
|
Mean Cholesterol (mmol/L)
|
5.0573596
|
|
Mean BMI
|
25.7698502
|
|
Mean Packyears
|
6.0744007
|
|
Mean Alcohol Intake (g)
|
7.4196629
|
|
Mean Weekly Physical Activity (minutes)
|
502.4408614
|
|
Mean Metabolic Disorder (%)
|
2.9451873
|
|
Mean NSES (%)
|
-0.7029401
|
#Making separate table for MUNICIPALITY to show how many zipcodes are included per municipality
#Municipalities with no zipcodes at all are not taken into account in the analysis
municipality_counts <- table(d$MUNICIPALITY)
municipality_df <- as.data.frame(municipality_counts)
colnames(municipality_df) <- c("Municipality", "Count")
kbl(municipality_df, caption = "Count of zipcodes by municipality") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Count of zipcodes by municipality
|
Municipality
|
Count
|
|
Appingedam
|
6
|
|
Delfzijl
|
7
|
|
Groningen
|
56
|
|
Loppersum
|
6
|
|
Stadskanaal
|
14
|
|
Veendam
|
12
|
|
Pekela
|
4
|
|
Oldambt
|
22
|
|
Westerwolde
|
4
|
|
Midden-Groningen
|
15
|
|
Het Hogeland
|
16
|
|
Westerkwartier
|
16
|
|
Achtkarspelen
|
12
|
|
De Wolden
|
3
|
|
Ameland
|
0
|
|
Harlingen
|
4
|
|
Heerenveen
|
16
|
|
Leeuwarden
|
44
|
|
Ooststellingwerf
|
0
|
|
Opsterland
|
14
|
|
Schiermonnikoog
|
0
|
|
Smallingerland
|
25
|
|
Terschelling
|
3
|
|
Vlieland
|
0
|
|
Weststellingwerf
|
15
|
|
Tytsjerksteradiel
|
17
|
|
Dantumadiel
|
6
|
|
Súdwest-Fryslân
|
34
|
|
De Fryske Marren
|
12
|
|
Waadhoeke
|
10
|
|
Noardeast-Fryslân
|
8
|
|
Assen
|
0
|
|
Coevorden
|
6
|
|
Emmen
|
45
|
|
Hoogeveen
|
18
|
|
Meppel
|
6
|
|
Aa en Hunze
|
6
|
|
Borger-Odoorn
|
11
|
|
Noordenveld
|
7
|
|
Westerveld
|
1
|
|
Tynaarlo
|
9
|
|
Midden-Drenthe
|
24
|
#Scatterplots to examine the basic relationships between mental health disorders and cardiovascular risk factors
ggplot(d, aes(x = SBP_T1, y = MENTAL_DISORDER_T1)) +
geom_point() +
theme_minimal() +
xlab("SBP (mmHg)") + ylab("Mental disorders (%)")

ggplot(d, aes(x = DBP_T1, y = MENTAL_DISORDER_T1)) +
geom_point() +
theme_minimal() +
xlab("DBP (mmHg)") + ylab("Mental disorders (%)")

ggplot(d, aes(x = HTN_MED_T1, y = MENTAL_DISORDER_T1)) +
geom_point() +
theme_minimal() +
xlab("HTN Medication Usage (%)") + ylab("Mental disorders (%)")

ggplot(d, aes(x = CHO_T1, y = MENTAL_DISORDER_T1)) +
geom_point() +
theme_minimal() +
xlab("Cholesterol (mmol/L)") + ylab("Mental disorders (%)")

Fitting LMM 1
## Fitting the LMMs
#Two Models will be fitted initially
#Model 1 with mental illness as the dependent and CVD risk factors as the independent variables
#Model 2 with lifestyle-related factors and socio-demographic variables as confounders
#However, some of the variables are in different scales. We will first standardize the data by calculating z-scores
#function scale() does this for us
d <- d %>%
mutate(
SBP_T1_scaled = scale(SBP_T1),
DBP_T1_scaled = scale(DBP_T1),
HTN_MED_T1_scaled = scale(HTN_MED_T1),
CHO_T1_scaled = scale(CHO_T1),
BMI_T1_scaled = scale(BMI_T1),
PACKYEARS_scaled = scale(PACKYEARS),
METABOLIC_DISORDER_T1_scaled = scale(METABOLIC_DISORDER_T1),
ALCOHOL_INTAKE_T1_scaled = scale(ALCOHOL_INTAKE_T1),
MWK_VAL_scaled = scale(MWK_VAL),
NSES_scaled = scale(NSES),
GROUP_SIZE_scaled = scale(GROUP_SIZE)
)
# Model 1
m1 <- lmer(MENTAL_DISORDER_T1~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + (1 | MUNICIPALITY), data = d)
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled +
## CHO_T1_scaled + (1 | MUNICIPALITY)
## Data: d
##
## REML criterion at convergence: 1122.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2641 -0.6883 -0.0669 0.6549 4.3969
##
## Random effects:
## Groups Name Variance Std.Dev.
## MUNICIPALITY (Intercept) 0.07143 0.2673
## Residual 0.43097 0.6565
## Number of obs: 534, groups: MUNICIPALITY, 37
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.58956 0.05607 30.42917 28.351 < 2e-16 ***
## SBP_T1_scaled -0.08634 0.08806 527.64758 -0.981 0.327277
## DBP_T1_scaled -0.31929 0.08678 528.66293 -3.679 0.000258 ***
## HTN_MED_T1_scaled 0.22217 0.03409 511.58303 6.518 1.71e-10 ***
## CHO_T1_scaled 0.03849 0.03216 528.95096 1.197 0.231846
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SBP_T1 DBP_T1 HTN_ME
## SBP_T1_scld 0.027
## DBP_T1_scld -0.035 -0.923
## HTN_MED_T1_ -0.072 -0.140 0.088
## CHO_T1_scld -0.009 -0.078 -0.025 -0.156
#The variance of the residual is high compared to that of MUNICIPALITY
#This could mean that there are other factors that could explain this variance
#For the fixed effects, SBP_T1 and CHO_T1 are not significant.
#DBP seems to have an inverse relationship with MH -> coefficient of -0.31929
#HTN_MED has a positive relationship -> coefficient of 0.22217
# Testing random effect
ranova(m1)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + (1 | MUNICIPALITY)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 7 -561.47 1137.0
## (1 | MUNICIPALITY) 6 -580.68 1173.3 38.41 1 5.735e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Highly significant, indicating there is a difference between municipalities
# Residuals plot
plot(m1)

#Data has outliers but overall the assumptions of the model are fulfilled, points are clustered around 0
# More plots
d$resid2 <- resid(m1)
d$fitted2 <- fitted(m1)
rr <- ranef(m1)$MUNICIPALITY[,1]
drr <- data.frame(rr=rr)
yl <- ylab("residuals")
yl0 <- ylab("")
gr <- background_grid(major = "xy", minor = "xy")
p1 <- ggplot(d, aes(x=fitted2, y=resid2)) + geom_point() + yl + gr
p2 <- ggplot(d, aes(x= DBP_T1_scaled, y=resid2)) + geom_point() + yl0 + gr
p3 <- ggplot(d, aes(x= HTN_MED_T1_scaled, y=resid2)) + geom_point() + yl0 + gr
p4 <- ggplot(d, aes(sample=resid2)) + geom_qq () + geom_qq_line() + yl + gr
p5 <- ggplot(drr,aes(sample=rr)) + geom_qq () + geom_qq_line() + yl0 + gr
cowplot::plot_grid(p1,p2,p3,p4,p5, align = "h", ncol = 3)

#Looks fine!
# Making forest plot to visualize the results:
#Extracting the coefficients
m1coef <- summary(m1)$coefficients %>%
as.data.frame() %>%
mutate(
Variable = rownames(.),
Estimate = Estimate,
CI_lower = Estimate - 1.96 * `Std. Error`,
CI_upper = Estimate + 1.96 * `Std. Error`
) %>%
filter(Variable != "(Intercept)" & !Variable %in% c("SBP_T1_scaled", "CHO_T1_scaled")) %>%
mutate(
Variable = recode(Variable, "HTN_MED_T1_scaled" = "Hypertension Medication Usage",
"DBP_T1_scaled" = "Diastolic Blood Pressure"))
#Making the plot:
ggplot(m1coef, aes(x = Estimate, y = Variable)) +
geom_point(color = "blue", size = 3) +
geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper), height = 0.1, color = "blue") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
labs(
title = "Forest Plot of Standardized Significant Fixed Effects for M1",
x = "Estimate (beta coefficient)",
y = "Variable"
) +
theme_minimal() +
theme(legend.position = "none")

Fitting LMM 2
# Model 2
m2 <- lmer(MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled +
BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled +
MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + (1 | MUNICIPALITY), data = d)
summary(m2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled +
## CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled +
## ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled +
## GROUP_SIZE_scaled + (1 | MUNICIPALITY)
## Data: d
##
## REML criterion at convergence: 1089.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1709 -0.6426 -0.0708 0.6198 4.3919
##
## Random effects:
## Groups Name Variance Std.Dev.
## MUNICIPALITY (Intercept) 0.0239 0.1546
## Residual 0.3987 0.6314
## Number of obs: 534, groups: MUNICIPALITY, 37
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.58197 0.04039 33.16757 39.164 < 2e-16
## SBP_T1_scaled -0.08278 0.09076 459.96939 -0.912 0.362177
## DBP_T1_scaled -0.22620 0.08647 510.25091 -2.616 0.009163
## HTN_MED_T1_scaled 0.10935 0.03761 477.79638 2.908 0.003809
## CHO_T1_scaled 0.04945 0.03160 519.69310 1.565 0.118255
## BMI_T1_scaled 0.11316 0.04658 473.41623 2.429 0.015503
## PACKYEARS_scaled 0.05995 0.04460 521.34447 1.344 0.179460
## METABOLIC_DISORDER_T1_scaled -0.03285 0.03449 515.33426 -0.952 0.341371
## ALCOHOL_INTAKE_T1_scaled -0.18015 0.04826 475.16061 -3.733 0.000212
## MWK_VAL_scaled -0.02976 0.03410 500.87144 -0.873 0.383242
## NSES_scaled -0.14334 0.03132 520.52911 -4.577 5.89e-06
## GROUP_SIZE_scaled 0.05902 0.02918 519.32017 2.022 0.043644
##
## (Intercept) ***
## SBP_T1_scaled
## DBP_T1_scaled **
## HTN_MED_T1_scaled **
## CHO_T1_scaled
## BMI_T1_scaled *
## PACKYEARS_scaled
## METABOLIC_DISORDER_T1_scaled
## ALCOHOL_INTAKE_T1_scaled ***
## MWK_VAL_scaled
## NSES_scaled ***
## GROUP_SIZE_scaled *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SBP_T1 DBP_T1 HTN_ME CHO_T1 BMI_T1 PACKYE METABO ALCOHO
## SBP_T1_scld 0.006
## DBP_T1_scld -0.037 -0.794
## HTN_MED_T1_ -0.052 -0.051 0.052
## CHO_T1_scld -0.006 -0.135 0.040 -0.159
## BMI_T1_scld -0.006 -0.104 -0.204 -0.187 -0.096
## PACKYEARS_s 0.009 -0.071 -0.071 -0.180 0.013 -0.138
## METABOLIC_D 0.015 -0.085 0.065 -0.309 0.085 -0.095 -0.156
## ALCOHOL_INT 0.057 -0.160 -0.179 0.195 -0.036 0.359 -0.354 -0.026
## MWK_VAL_scl -0.022 -0.213 0.138 0.043 0.157 0.009 -0.087 -0.008 -0.196
## NSES_scaled -0.015 0.153 -0.195 0.039 -0.187 0.153 0.151 0.081 -0.089
## GROUP_SIZE_ 0.013 0.020 -0.033 -0.002 0.049 0.017 -0.056 0.002 0.172
## MWK_VA NSES_s
## SBP_T1_scld
## DBP_T1_scld
## HTN_MED_T1_
## CHO_T1_scld
## BMI_T1_scld
## PACKYEARS_s
## METABOLIC_D
## ALCOHOL_INT
## MWK_VAL_scl
## NSES_scaled 0.006
## GROUP_SIZE_ -0.083 -0.051
#In this model, even less of the variance is explained by the random MUNICIPALITY intercept
#For the fixed effects, SBP_T1 and CHO_T1 are still not significant.
#The coefficients are lower for both DBP_T1 (B = -0.22620) and HTN_MED_T1 (B = 0.10935) than in M1
#Some of the control variables also are significant: GROUP_SIZE, NSES, ALCOHOL_INTAKE_T1, and BMI_T1
#AIC is also slightly better, indicating better model fit: 1136 (m1) vs 1117 (m2)
# Testing random effect again
ranova(m2)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + (1 | MUNICIPALITY)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 14 -544.95 1117.9
## (1 | MUNICIPALITY) 13 -550.98 1128.0 12.061 1 0.0005148 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Again highly significant, indicating there is a difference between municipalities
# Residuals plot
plot(m2)

#Again overall the assumptions of the model are fulfilled, points are clustered around 0
# More plots v2
d$resid2 <- resid(m2)
d$fitted2 <- fitted(m2)
rr <- ranef(m1)$MUNICIPALITY[,1]
drr <- data.frame(rr=rr)
yl <- ylab("residuals")
yl0 <- ylab("")
gr <- background_grid(major = "xy", minor = "xy")
p1 <- ggplot(d, aes(x=fitted2, y=resid2)) + geom_point() + yl + gr
p2 <- ggplot(d, aes(x= DBP_T1_scaled, y=resid2)) + geom_point() + yl0 + gr
p3 <- ggplot(d, aes(x= HTN_MED_T1_scaled, y=resid2)) + geom_point() + yl0 + gr
p4 <- ggplot(d, aes(sample=resid2)) + geom_qq () + geom_qq_line() + yl + gr
p5 <- ggplot(drr,aes(sample=rr)) + geom_qq () + geom_qq_line() + yl0 + gr
cowplot::plot_grid(p1,p2,p3,p4,p5, align = "h", ncol = 3)

#Good!
# Let's make another forest plot:
#Extracting the coefficients
m2coef <- summary(m2)$coefficients %>%
as.data.frame() %>%
mutate(
Variable = rownames(.),
Estimate = Estimate,
CI_lower = Estimate - 1.96 * `Std. Error`,
CI_upper = Estimate + 1.96 * `Std. Error`
) %>%
filter(Variable != "(Intercept)" & !Variable %in% c(
"SBP_T1_scaled", "CHO_T1_scaled", "PACKYEARS_scaled",
"METABOLIC_DISORDER_T1_scaled", "MWK_VAL_scaled")) %>%
mutate(
Variable = recode(Variable, "HTN_MED_T1_scaled" = "Hypertension Medication Usage",
"DBP_T1_scaled" = "Diastolic Blood Pressure",
"GROUP_SIZE_scaled" = "Group Size",
"NSES_scaled" = "NSES",
"BMI_T1_scaled" = "BMI",
"ALCOHOL_INTAKE_T1_scaled" = "Alcohol Usage"))
#Making the plot:
ggplot(m2coef, aes(x = Estimate, y = Variable, color = Variable)) +
geom_point(size = 3) +
geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper), height = 0.2) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
scale_color_manual(values = c("NSES" = "red", "Hypertension Medication Usage" = "blue",
"Group Size" = "red", "Diastolic Blood Pressure" = "blue",
"BMI" = "red", "Alcohol Usage" = "red")) +
labs(
title = "Forest Plot of Standardized Significant Fixed Effects for M2",
x = "Estimate (beta coefficient)",
y = "Variable"
) +
theme_minimal() +
theme(legend.position = "none")

Trying Random slopes for m3 and m4
Note: I decided not to write about this as I still do not completely
understand the random slopes model…
## The resulting explained variance of the random slopes was extremely small and running ranova()
## revealed that they were insignificant in both model 3 and 4
## So I'm not sure if it is just not that applicable in this case, or if I did it wrong
## More models: including random slopes in the model for the 2 significant CVD variables from m1 and m2
# Model 3
m3 <- lmer(MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled +
BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled +
MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled +
(DBP_T1_scaled | MUNICIPALITY), data = d)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled +
## CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled +
## ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled +
## GROUP_SIZE_scaled + (DBP_T1_scaled | MUNICIPALITY)
## Data: d
##
## REML criterion at convergence: 1088.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1139 -0.6620 -0.0855 0.6279 4.4705
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## MUNICIPALITY (Intercept) 0.023645 0.15377
## DBP_T1_scaled 0.002221 0.04713 -1.00
## Residual 0.397191 0.63023
## Number of obs: 534, groups: MUNICIPALITY, 37
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.58456 0.04070 32.17344 38.929 < 2e-16
## SBP_T1_scaled -0.07254 0.08964 442.24476 -0.809 0.41880
## DBP_T1_scaled -0.21968 0.08650 492.72019 -2.540 0.01140
## HTN_MED_T1_scaled 0.10368 0.03744 465.44767 2.769 0.00584
## CHO_T1_scaled 0.05111 0.03149 516.75944 1.623 0.10514
## BMI_T1_scaled 0.12128 0.04638 448.35720 2.615 0.00923
## PACKYEARS_scaled 0.05661 0.04443 519.17042 1.274 0.20321
## METABOLIC_DISORDER_T1_scaled -0.03163 0.03441 514.20285 -0.919 0.35845
## ALCOHOL_INTAKE_T1_scaled -0.18535 0.04805 482.58525 -3.857 0.00013
## MWK_VAL_scaled -0.02954 0.03359 444.04411 -0.880 0.37956
## NSES_scaled -0.13799 0.03145 517.64243 -4.388 1.39e-05
## GROUP_SIZE_scaled 0.06279 0.02911 514.37070 2.157 0.03149
##
## (Intercept) ***
## SBP_T1_scaled
## DBP_T1_scaled *
## HTN_MED_T1_scaled **
## CHO_T1_scaled
## BMI_T1_scaled **
## PACKYEARS_scaled
## METABOLIC_DISORDER_T1_scaled
## ALCOHOL_INTAKE_T1_scaled ***
## MWK_VAL_scaled
## NSES_scaled ***
## GROUP_SIZE_scaled *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SBP_T1 DBP_T1 HTN_ME CHO_T1 BMI_T1 PACKYE METABO ALCOHO
## SBP_T1_scld -0.011
## DBP_T1_scld -0.109 -0.784
## HTN_MED_T1_ -0.049 -0.040 0.052
## CHO_T1_scld -0.007 -0.131 0.035 -0.145
## BMI_T1_scld -0.028 -0.096 -0.214 -0.189 -0.098
## PACKYEARS_s 0.021 -0.068 -0.074 -0.193 0.016 -0.132
## METABOLIC_D 0.011 -0.086 0.059 -0.301 0.081 -0.095 -0.155
## ALCOHOL_INT 0.069 -0.164 -0.182 0.195 -0.042 0.348 -0.356 -0.022
## MWK_VAL_scl -0.010 -0.216 0.136 0.039 0.170 0.017 -0.089 -0.006 -0.187
## NSES_scaled -0.026 0.159 -0.192 0.043 -0.182 0.164 0.149 0.081 -0.099
## GROUP_SIZE_ 0.004 0.017 -0.029 -0.007 0.046 0.018 -0.049 0.002 0.165
## MWK_VA NSES_s
## SBP_T1_scld
## DBP_T1_scld
## HTN_MED_T1_
## CHO_T1_scld
## BMI_T1_scld
## PACKYEARS_s
## METABOLIC_D
## ALCOHOL_INT
## MWK_VAL_scl
## NSES_scaled 0.003
## GROUP_SIZE_ -0.090 -0.049
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
#The random effect of the slope of DBP_T1_scaled | MUNICIPALITY is very small
ranova(m3)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + (DBP_T1_scaled | MUNICIPALITY)
## npar logLik AIC LRT Df
## <none> 16 -544.22 1120.4
## DBP_T1_scaled in (DBP_T1_scaled | MUNICIPALITY) 14 -544.95 1117.9 1.4619 2
## Pr(>Chisq)
## <none>
## DBP_T1_scaled in (DBP_T1_scaled | MUNICIPALITY) 0.4814
#And non-significant
# Model 4
m4 <- lmer(MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled +
BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled +
MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled +
(HTN_MED_T1_scaled | MUNICIPALITY), data = d)
summary(m4)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled +
## CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled +
## ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled +
## GROUP_SIZE_scaled + (HTN_MED_T1_scaled | MUNICIPALITY)
## Data: d
##
## REML criterion at convergence: 1089.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1758 -0.6512 -0.0619 0.6140 4.4206
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## MUNICIPALITY (Intercept) 0.025303 0.15907
## HTN_MED_T1_scaled 0.001439 0.03793 0.71
## Residual 0.396437 0.62963
## Number of obs: 534, groups: MUNICIPALITY, 37
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.58574 0.04064 33.93460 39.018 < 2e-16
## SBP_T1_scaled -0.08269 0.09055 456.03610 -0.913 0.361643
## DBP_T1_scaled -0.23202 0.08662 510.60752 -2.679 0.007632
## HTN_MED_T1_scaled 0.10410 0.03831 29.41253 2.717 0.010926
## CHO_T1_scaled 0.04977 0.03162 515.99403 1.574 0.116082
## BMI_T1_scaled 0.10722 0.04651 437.80501 2.306 0.021603
## PACKYEARS_scaled 0.06040 0.04456 518.11113 1.355 0.175912
## METABOLIC_DISORDER_T1_scaled -0.03432 0.03453 508.25161 -0.994 0.320744
## ALCOHOL_INTAKE_T1_scaled -0.17219 0.04810 297.30918 -3.580 0.000401
## MWK_VAL_scaled -0.03106 0.03405 459.18432 -0.912 0.362143
## NSES_scaled -0.14448 0.03118 443.27935 -4.634 4.73e-06
## GROUP_SIZE_scaled 0.05813 0.02918 517.44522 1.992 0.046877
##
## (Intercept) ***
## SBP_T1_scaled
## DBP_T1_scaled **
## HTN_MED_T1_scaled *
## CHO_T1_scaled
## BMI_T1_scaled *
## PACKYEARS_scaled
## METABOLIC_DISORDER_T1_scaled
## ALCOHOL_INTAKE_T1_scaled ***
## MWK_VAL_scaled
## NSES_scaled ***
## GROUP_SIZE_scaled *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SBP_T1 DBP_T1 HTN_ME CHO_T1 BMI_T1 PACKYE METABO ALCOHO
## SBP_T1_scld 0.010
## DBP_T1_scld -0.036 -0.795
## HTN_MED_T1_ 0.056 -0.045 0.038
## CHO_T1_scld -0.005 -0.137 0.037 -0.157
## BMI_T1_scld 0.000 -0.104 -0.203 -0.189 -0.098
## PACKYEARS_s 0.007 -0.069 -0.073 -0.176 0.012 -0.135
## METABOLIC_D 0.018 -0.080 0.061 -0.297 0.086 -0.096 -0.157
## ALCOHOL_INT 0.049 -0.154 -0.183 0.207 -0.029 0.359 -0.356 -0.026
## MWK_VAL_scl -0.024 -0.216 0.141 0.040 0.154 0.006 -0.086 -0.011 -0.197
## NSES_scaled -0.011 0.151 -0.197 0.038 -0.188 0.148 0.153 0.082 -0.083
## GROUP_SIZE_ 0.013 0.022 -0.033 -0.005 0.046 0.015 -0.054 0.001 0.171
## MWK_VA NSES_s
## SBP_T1_scld
## DBP_T1_scld
## HTN_MED_T1_
## CHO_T1_scld
## BMI_T1_scld
## PACKYEARS_s
## METABOLIC_D
## ALCOHOL_INT
## MWK_VAL_scl
## NSES_scaled 0.004
## GROUP_SIZE_ -0.085 -0.052
#Also small
ranova(m4)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + (HTN_MED_T1_scaled | MUNICIPALITY)
## npar logLik AIC
## <none> 16 -544.69 1121.4
## HTN_MED_T1_scaled in (HTN_MED_T1_scaled | MUNICIPALITY) 14 -544.95 1117.9
## LRT Df Pr(>Chisq)
## <none>
## HTN_MED_T1_scaled in (HTN_MED_T1_scaled | MUNICIPALITY) 0.52467 2 0.7693
#And also non-significant