Libraries used

library(readxl) ## For reading Excel files in R
library(dplyr) ## Mutating variables + more
library(kableExtra) ## For making tables
library(ggplot2) ## For making plots
library(tidyverse) ## For making tables
library(lme4) ## For fitting LMM
library(lmerTest)
library(cowplot) ## For making plots to check LMM assumptions

Data cleaning

## Load data
d <-  read_xlsx("Participants_Aggregated_Zipcode2.xlsx")

## Data cleaning
#Data needs cleaning -> many variables are the wrong type and some need to be converted

# Commas to periods & convert to numeric for all continuous variables
chrtonum <- c("SBP_T1", "DBP_T1", "HTN_MED_T1", "CHO_T1", 
                     "BMI_T1", "PACKYEARS", "ALCOHOL_INTAKE_T1", 
                     "MWK_VAL", "NSES", "METABOLIC_DISORDER_T1", "MENTAL_DISORDER_T1")
d <- d %>%
  mutate_at(vars(all_of(chrtonum)), ~ as.numeric(gsub(",", ".", .)))

# Convert categorical variable to factor part 1
municipalities <- c("Appingedam", "Delfzijl", "Groningen", "Loppersum", "Stadskanaal", "Veendam",
  "Pekela", "Oldambt", "Westerwolde", "Midden-Groningen", "Het Hogeland",
  "Westerkwartier", "Achtkarspelen", "De Wolden", "Ameland", "Harlingen", "Heerenveen",
  "Leeuwarden", "Ooststellingwerf", "Opsterland", "Schiermonnikoog",
  "Smallingerland", "Terschelling", "Vlieland", "Weststellingwerf",
  "Tytsjerksteradiel", "Dantumadiel", "Súdwest-Fryslân", "De Fryske Marren",
  "Waadhoeke", "Noardeast-Fryslân", "Assen", "Coevorden", "Emmen", "Hoogeveen",
  "Meppel", "Aa en Hunze", "Borger-Odoorn", "Noordenveld",
  "Westerveld", "Tynaarlo", "Midden-Drenthe")
d$MUNICIPALITY <- factor(d$MUNICIPALITY, levels = municipalities)

# Convert categorical variable to factor part 2
d <- d %>%
  mutate(GROUP_SIZE_CAT = factor(GROUP_SIZE_CAT,
                                 levels = c(1, 2, 3, 4, 5, 6, 7),
                                 labels = c("1", "2", "3", "4", "5", "6", "7")))

# Creating new variable out of GROUP_SIZE_CAT to account for actual group size
d <- d %>%
  mutate(GROUP_SIZE = case_when(
    GROUP_SIZE_CAT == 1 ~ 100,
    GROUP_SIZE_CAT == 2 ~ 250,
    GROUP_SIZE_CAT == 3 ~ 350,
    GROUP_SIZE_CAT == 4 ~ 450,
    GROUP_SIZE_CAT == 5 ~ 550,
    GROUP_SIZE_CAT == 6 ~ 650,
    GROUP_SIZE_CAT == 7 ~ 750,
    TRUE ~ NA_real_))

# Convert categorical variable to factor part 3
d$ZIPCODE <- factor(as.character(d$ZIPCODE), levels = unique(as.character(d$ZIPCODE)))

# Let's check the data again
str(d)

## tibble [534 × 18] (S3: tbl_df/tbl/data.frame)
##  $ MUNICIPALITY         : Factor w/ 42 levels "Appingedam","Delfzijl",..: 33 33 33 33 33 33 34 34 34 34 ...
##  $ ZIPCODE              : Factor w/ 293 levels "7741","7742",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ From                 : num [1:534] 7740 7740 7740 7740 7740 7740 7760 7760 7760 7760 ...
##  $ To                   : num [1:534] 7756 7756 7756 7756 7756 ...
##  $ PROVINCE             : chr [1:534] "Drenthe" "Drenthe" "Drenthe" "Drenthe" ...
##  $ GROUP_SIZE_CAT       : Factor w/ 7 levels "1","2","3","4",..: 2 3 1 2 1 2 1 2 1 1 ...
##  $ BMI_T1               : num [1:534] 26.2 25.9 26.7 26 26.1 ...
##  $ DBP_T1               : num [1:534] 78.3 73.3 77.4 72.5 77.3 ...
##  $ SBP_T1               : num [1:534] 132 125 133 123 132 ...
##  $ HTN_MED_T1           : num [1:534] 17.8 11.4 11.5 10.7 13.5 ...
##  $ CHO_T1               : num [1:534] 5.23 5.1 5.2 4.94 5.17 5.19 5.14 4.87 5.08 4.9 ...
##  $ PACKYEARS            : num [1:534] 8.11 6.74 7.74 4.49 5.73 5.78 6.83 4.67 8.74 6.78 ...
##  $ METABOLIC_DISORDER_T1: num [1:534] 3.03 2.34 3.11 3.38 3.06 1.72 4.97 3.57 5.62 2.44 ...
##  $ ALCOHOL_INTAKE_T1    : num [1:534] 11.01 5 10.62 4.71 9.89 ...
##  $ MWK_VAL              : num [1:534] 609 447 506 453 337 ...
##  $ NSES                 : num [1:534] -1.35 -1.35 -1.19 -1.2 -0.22 -0.23 -0.1 -0.11 -0.84 -0.83 ...
##  $ MENTAL_DISORDER_T1   : num [1:534] 1.14 2.51 1.69 1.96 0.76 1.34 1.14 1.47 0.17 3.38 ...
##  $ GROUP_SIZE           : num [1:534] 250 350 100 250 100 250 100 250 100 100 ...

#Looks better!
#Variables 'From', 'To', 'PROVINCE', and 'GROUP_SIZE_CAT' will not be used for this analysis

# Check for missing values:
colSums(is.na(d))

##          MUNICIPALITY               ZIPCODE                  From 
##                     0                     0                     0 
##                    To              PROVINCE        GROUP_SIZE_CAT 
##                     0                     0                     0 
##                BMI_T1                DBP_T1                SBP_T1 
##                     0                     0                     0 
##            HTN_MED_T1                CHO_T1             PACKYEARS 
##                     0                     0                     0 
## METABOLIC_DISORDER_T1     ALCOHOL_INTAKE_T1               MWK_VAL 
##                     0                     0                     0 
##                  NSES    MENTAL_DISORDER_T1            GROUP_SIZE 
##                     0                     0                     0

#No missing values

Descriptive statistics

## Descriptives
#calculating zipcodes per municipality
zipcode_counts <- d %>%
  group_by(MUNICIPALITY) %>%
  summarize(zipcodes_per_municipality = n())

#Making table with all variables + descriptive stats
summary_stats <- d %>%
  summarize(
    `Number of zipcodes` = (nrow(d)),
    `Number of municipalities` = n_distinct(MUNICIPALITY),
    `Median number of zipcodes per municipality` = median(zipcode_counts$zipcodes_per_municipality),
    `Median group size` = median(GROUP_SIZE),
    `Mean Mental Disorder (%)` = mean(MENTAL_DISORDER_T1),
    `Mean SBP (mmHg)` = mean(SBP_T1),
    `Mean DBP (mmHg)` = mean(DBP_T1),
    `Mean HTN Medication Usage (%)` = mean(HTN_MED_T1),
    `Mean Cholesterol (mmol/L)` = mean(CHO_T1),
    `Mean BMI` = mean(BMI_T1),
    `Mean Packyears` = mean(PACKYEARS),
    `Mean Alcohol Intake (g)` = mean(ALCOHOL_INTAKE_T1),
    `Mean Weekly Physical Activity (minutes)` = mean(MWK_VAL),
    `Mean Metabolic Disorder (%)` = mean(METABOLIC_DISORDER_T1),
    `Mean NSES (%)` = mean(NSES),
  ) %>%
  pivot_longer(
    cols = everything(),
    names_to = "Statistic",
    values_to = "Value"
  )

summary_stats %>%
  kbl(caption = "Table of all variables") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)

Table of all variables
Statistic	Value
Number of zipcodes	534.0000000
Number of municipalities	37.0000000
Median number of zipcodes per municipality	12.0000000
Median group size	100.0000000
Mean Mental Disorder (%)	1.6171161
Mean SBP (mmHg)	125.2363296
Mean DBP (mmHg)	73.4533895
Mean HTN Medication Usage (%)	12.2505993
Mean Cholesterol (mmol/L)	5.0573596
Mean BMI	25.7698502
Mean Packyears	6.0744007
Mean Alcohol Intake (g)	7.4196629
Mean Weekly Physical Activity (minutes)	502.4408614
Mean Metabolic Disorder (%)	2.9451873
Mean NSES (%)	-0.7029401

#Making separate table for MUNICIPALITY to show how many zipcodes are included per municipality
#Municipalities with no zipcodes at all are not taken into account in the analysis
municipality_counts <- table(d$MUNICIPALITY)
municipality_df <- as.data.frame(municipality_counts)
colnames(municipality_df) <- c("Municipality", "Count")
kbl(municipality_df, caption = "Count of zipcodes by municipality") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)

Count of zipcodes by municipality
Municipality	Count
Appingedam	6
Delfzijl	7
Groningen	56
Loppersum	6
Stadskanaal	14
Veendam	12
Pekela	4
Oldambt	22
Westerwolde	4
Midden-Groningen	15
Het Hogeland	16
Westerkwartier	16
Achtkarspelen	12
De Wolden	3
Ameland	0
Harlingen	4
Heerenveen	16
Leeuwarden	44
Ooststellingwerf	0
Opsterland	14
Schiermonnikoog	0
Smallingerland	25
Terschelling	3
Vlieland	0
Weststellingwerf	15
Tytsjerksteradiel	17
Dantumadiel	6
Súdwest-Fryslân	34
De Fryske Marren	12
Waadhoeke	10
Noardeast-Fryslân	8
Assen	0
Coevorden	6
Emmen	45
Hoogeveen	18
Meppel	6
Aa en Hunze	6
Borger-Odoorn	11
Noordenveld	7
Westerveld	1
Tynaarlo	9
Midden-Drenthe	24

#Scatterplots to examine the basic relationships between mental health disorders and cardiovascular risk factors
ggplot(d, aes(x = SBP_T1, y = MENTAL_DISORDER_T1)) + 
  geom_point() + 
  theme_minimal() +
  xlab("SBP (mmHg)") + ylab("Mental disorders (%)")

ggplot(d, aes(x = DBP_T1, y = MENTAL_DISORDER_T1)) + 
  geom_point() + 
  theme_minimal() +
  xlab("DBP (mmHg)") + ylab("Mental disorders (%)")

ggplot(d, aes(x = HTN_MED_T1, y = MENTAL_DISORDER_T1)) + 
  geom_point() + 
  theme_minimal() +
  xlab("HTN Medication Usage (%)") + ylab("Mental disorders (%)")

ggplot(d, aes(x = CHO_T1, y = MENTAL_DISORDER_T1)) + 
  geom_point() + 
  theme_minimal() +
  xlab("Cholesterol (mmol/L)") + ylab("Mental disorders (%)")

Fitting LMM 1

## Fitting the LMMs
#Two Models will be fitted initially
#Model 1 with mental illness as the dependent and CVD risk factors as the independent variables
#Model 2 with lifestyle-related factors and socio-demographic variables as confounders
#However, some of the variables are in different scales. We will first standardize the data by calculating z-scores
#function scale() does this for us
d <- d %>%
  mutate(
    SBP_T1_scaled = scale(SBP_T1),
    DBP_T1_scaled = scale(DBP_T1),
    HTN_MED_T1_scaled = scale(HTN_MED_T1),
    CHO_T1_scaled = scale(CHO_T1),
    BMI_T1_scaled = scale(BMI_T1),
    PACKYEARS_scaled = scale(PACKYEARS),
    METABOLIC_DISORDER_T1_scaled = scale(METABOLIC_DISORDER_T1),
    ALCOHOL_INTAKE_T1_scaled = scale(ALCOHOL_INTAKE_T1),
    MWK_VAL_scaled = scale(MWK_VAL),
    NSES_scaled = scale(NSES),
    GROUP_SIZE_scaled = scale(GROUP_SIZE)
  )

# Model 1
m1 <- lmer(MENTAL_DISORDER_T1~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + (1 | MUNICIPALITY), data = d)
summary(m1)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled +  
##     CHO_T1_scaled + (1 | MUNICIPALITY)
##    Data: d
## 
## REML criterion at convergence: 1122.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2641 -0.6883 -0.0669  0.6549  4.3969 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  MUNICIPALITY (Intercept) 0.07143  0.2673  
##  Residual                 0.43097  0.6565  
## Number of obs: 534, groups:  MUNICIPALITY, 37
## 
## Fixed effects:
##                    Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)         1.58956    0.05607  30.42917  28.351  < 2e-16 ***
## SBP_T1_scaled      -0.08634    0.08806 527.64758  -0.981 0.327277    
## DBP_T1_scaled      -0.31929    0.08678 528.66293  -3.679 0.000258 ***
## HTN_MED_T1_scaled   0.22217    0.03409 511.58303   6.518 1.71e-10 ***
## CHO_T1_scaled       0.03849    0.03216 528.95096   1.197 0.231846    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SBP_T1 DBP_T1 HTN_ME
## SBP_T1_scld  0.027                     
## DBP_T1_scld -0.035 -0.923              
## HTN_MED_T1_ -0.072 -0.140  0.088       
## CHO_T1_scld -0.009 -0.078 -0.025 -0.156

#The variance of the residual is high compared to that of MUNICIPALITY
#This could mean that there are other factors that could explain this variance
#For the fixed effects, SBP_T1 and CHO_T1 are not significant. 
#DBP seems to have an inverse relationship with MH -> coefficient of -0.31929
#HTN_MED has a positive relationship -> coefficient of 0.22217

# Testing random effect
ranova(m1)

## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + (1 | MUNICIPALITY)
##                    npar  logLik    AIC   LRT Df Pr(>Chisq)    
## <none>                7 -561.47 1137.0                        
## (1 | MUNICIPALITY)    6 -580.68 1173.3 38.41  1  5.735e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Highly significant, indicating there is a difference between municipalities

# Residuals plot
plot(m1)

#Data has outliers but overall the assumptions of the model are fulfilled, points are clustered around 0

# More plots
d$resid2 <- resid(m1)
d$fitted2 <- fitted(m1)
rr <- ranef(m1)$MUNICIPALITY[,1]
drr <- data.frame(rr=rr)
yl <- ylab("residuals")
yl0 <- ylab("")
gr <- background_grid(major = "xy", minor = "xy")
p1 <- ggplot(d, aes(x=fitted2, y=resid2)) + geom_point() + yl + gr
p2 <- ggplot(d, aes(x= DBP_T1_scaled, y=resid2)) + geom_point() + yl0 + gr
p3 <- ggplot(d, aes(x= HTN_MED_T1_scaled, y=resid2)) + geom_point() + yl0 + gr
p4 <- ggplot(d, aes(sample=resid2)) + geom_qq () + geom_qq_line() + yl + gr
p5 <- ggplot(drr,aes(sample=rr)) + geom_qq () + geom_qq_line() + yl0 + gr
cowplot::plot_grid(p1,p2,p3,p4,p5, align = "h", ncol = 3)

#Looks fine!

# Making forest plot to visualize the results:
#Extracting the coefficients
m1coef <- summary(m1)$coefficients %>%
  as.data.frame() %>%
  mutate(
    Variable = rownames(.),
    Estimate = Estimate,
    CI_lower = Estimate - 1.96 * `Std. Error`,
    CI_upper = Estimate + 1.96 * `Std. Error`
  ) %>%
  filter(Variable != "(Intercept)" & !Variable %in% c("SBP_T1_scaled", "CHO_T1_scaled")) %>%
  mutate(
    Variable = recode(Variable, "HTN_MED_T1_scaled" = "Hypertension Medication Usage",
                      "DBP_T1_scaled" = "Diastolic Blood Pressure")) 
#Making the plot:
ggplot(m1coef, aes(x = Estimate, y = Variable)) +
  geom_point(color = "blue", size = 3) +
  geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper), height = 0.1, color = "blue") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  labs(
    title = "Forest Plot of Standardized Significant Fixed Effects for M1",
    x = "Estimate (beta coefficient)",
    y = "Variable"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Fitting LMM 2

# Model 2
m2 <- lmer(MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled +
             BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled +
             MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + (1 | MUNICIPALITY), data = d)
summary(m2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled +  
##     CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled +  
##     ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled +  
##     GROUP_SIZE_scaled + (1 | MUNICIPALITY)
##    Data: d
## 
## REML criterion at convergence: 1089.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1709 -0.6426 -0.0708  0.6198  4.3919 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  MUNICIPALITY (Intercept) 0.0239   0.1546  
##  Residual                 0.3987   0.6314  
## Number of obs: 534, groups:  MUNICIPALITY, 37
## 
## Fixed effects:
##                               Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                    1.58197    0.04039  33.16757  39.164  < 2e-16
## SBP_T1_scaled                 -0.08278    0.09076 459.96939  -0.912 0.362177
## DBP_T1_scaled                 -0.22620    0.08647 510.25091  -2.616 0.009163
## HTN_MED_T1_scaled              0.10935    0.03761 477.79638   2.908 0.003809
## CHO_T1_scaled                  0.04945    0.03160 519.69310   1.565 0.118255
## BMI_T1_scaled                  0.11316    0.04658 473.41623   2.429 0.015503
## PACKYEARS_scaled               0.05995    0.04460 521.34447   1.344 0.179460
## METABOLIC_DISORDER_T1_scaled  -0.03285    0.03449 515.33426  -0.952 0.341371
## ALCOHOL_INTAKE_T1_scaled      -0.18015    0.04826 475.16061  -3.733 0.000212
## MWK_VAL_scaled                -0.02976    0.03410 500.87144  -0.873 0.383242
## NSES_scaled                   -0.14334    0.03132 520.52911  -4.577 5.89e-06
## GROUP_SIZE_scaled              0.05902    0.02918 519.32017   2.022 0.043644
##                                 
## (Intercept)                  ***
## SBP_T1_scaled                   
## DBP_T1_scaled                ** 
## HTN_MED_T1_scaled            ** 
## CHO_T1_scaled                   
## BMI_T1_scaled                *  
## PACKYEARS_scaled                
## METABOLIC_DISORDER_T1_scaled    
## ALCOHOL_INTAKE_T1_scaled     ***
## MWK_VAL_scaled                  
## NSES_scaled                  ***
## GROUP_SIZE_scaled            *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SBP_T1 DBP_T1 HTN_ME CHO_T1 BMI_T1 PACKYE METABO ALCOHO
## SBP_T1_scld  0.006                                                        
## DBP_T1_scld -0.037 -0.794                                                 
## HTN_MED_T1_ -0.052 -0.051  0.052                                          
## CHO_T1_scld -0.006 -0.135  0.040 -0.159                                   
## BMI_T1_scld -0.006 -0.104 -0.204 -0.187 -0.096                            
## PACKYEARS_s  0.009 -0.071 -0.071 -0.180  0.013 -0.138                     
## METABOLIC_D  0.015 -0.085  0.065 -0.309  0.085 -0.095 -0.156              
## ALCOHOL_INT  0.057 -0.160 -0.179  0.195 -0.036  0.359 -0.354 -0.026       
## MWK_VAL_scl -0.022 -0.213  0.138  0.043  0.157  0.009 -0.087 -0.008 -0.196
## NSES_scaled -0.015  0.153 -0.195  0.039 -0.187  0.153  0.151  0.081 -0.089
## GROUP_SIZE_  0.013  0.020 -0.033 -0.002  0.049  0.017 -0.056  0.002  0.172
##             MWK_VA NSES_s
## SBP_T1_scld              
## DBP_T1_scld              
## HTN_MED_T1_              
## CHO_T1_scld              
## BMI_T1_scld              
## PACKYEARS_s              
## METABOLIC_D              
## ALCOHOL_INT              
## MWK_VAL_scl              
## NSES_scaled  0.006       
## GROUP_SIZE_ -0.083 -0.051

#In this model, even less of the variance is explained by the random MUNICIPALITY intercept
#For the fixed effects, SBP_T1 and CHO_T1 are still not significant. 
#The coefficients are lower for both DBP_T1 (B = -0.22620) and HTN_MED_T1 (B = 0.10935) than in M1
#Some of the control variables also are significant: GROUP_SIZE, NSES, ALCOHOL_INTAKE_T1, and BMI_T1
#AIC is also slightly better, indicating better model fit: 1136 (m1) vs 1117 (m2)

# Testing random effect again
ranova(m2)

## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + (1 | MUNICIPALITY)
##                    npar  logLik    AIC    LRT Df Pr(>Chisq)    
## <none>               14 -544.95 1117.9                         
## (1 | MUNICIPALITY)   13 -550.98 1128.0 12.061  1  0.0005148 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Again highly significant, indicating there is a difference between municipalities

# Residuals plot
plot(m2)

#Again overall the assumptions of the model are fulfilled, points are clustered around 0

# More plots v2
d$resid2 <- resid(m2)
d$fitted2 <- fitted(m2)
rr <- ranef(m1)$MUNICIPALITY[,1]
drr <- data.frame(rr=rr)
yl <- ylab("residuals")
yl0 <- ylab("")
gr <- background_grid(major = "xy", minor = "xy")
p1 <- ggplot(d, aes(x=fitted2, y=resid2)) + geom_point() + yl + gr
p2 <- ggplot(d, aes(x= DBP_T1_scaled, y=resid2)) + geom_point() + yl0 + gr
p3 <- ggplot(d, aes(x= HTN_MED_T1_scaled, y=resid2)) + geom_point() + yl0 + gr
p4 <- ggplot(d, aes(sample=resid2)) + geom_qq () + geom_qq_line() + yl + gr
p5 <- ggplot(drr,aes(sample=rr)) + geom_qq () + geom_qq_line() + yl0 + gr
cowplot::plot_grid(p1,p2,p3,p4,p5, align = "h", ncol = 3)

#Good!

# Let's make another forest plot:
#Extracting the coefficients
m2coef <- summary(m2)$coefficients %>%
  as.data.frame() %>%
  mutate(
    Variable = rownames(.),
    Estimate = Estimate,
    CI_lower = Estimate - 1.96 * `Std. Error`,
    CI_upper = Estimate + 1.96 * `Std. Error`
  ) %>%
  filter(Variable != "(Intercept)" & !Variable %in% c(
    "SBP_T1_scaled", "CHO_T1_scaled", "PACKYEARS_scaled", 
    "METABOLIC_DISORDER_T1_scaled", "MWK_VAL_scaled")) %>%
  mutate(
    Variable = recode(Variable, "HTN_MED_T1_scaled" = "Hypertension Medication Usage",
                      "DBP_T1_scaled" = "Diastolic Blood Pressure",
                      "GROUP_SIZE_scaled" = "Group Size",
                      "NSES_scaled" = "NSES",
                      "BMI_T1_scaled" = "BMI",
                      "ALCOHOL_INTAKE_T1_scaled" = "Alcohol Usage")) 
#Making the plot:
ggplot(m2coef, aes(x = Estimate, y = Variable, color = Variable)) +
  geom_point(size = 3) +
  geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper), height = 0.2) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  scale_color_manual(values = c("NSES" = "red", "Hypertension Medication Usage" = "blue",
                                "Group Size" = "red", "Diastolic Blood Pressure" = "blue",
                                "BMI" = "red", "Alcohol Usage" = "red")) +
  labs(
    title = "Forest Plot of Standardized Significant Fixed Effects for M2",
    x = "Estimate (beta coefficient)",
    y = "Variable"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Trying Random slopes for m3 and m4

Note: I decided not to write about this as I still do not completely understand the random slopes model…

## The resulting explained variance of the random slopes was extremely small and running ranova()
## revealed that they were insignificant in both model 3 and 4
## So I'm not sure if it is just not that applicable in this case, or if I did it wrong

## More models: including random slopes in the model for the 2 significant CVD variables from m1 and m2
# Model 3
m3 <- lmer(MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled +
             BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled +
             MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + 
             (DBP_T1_scaled | MUNICIPALITY), data = d)
summary(m3)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled +  
##     CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled +  
##     ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled +  
##     GROUP_SIZE_scaled + (DBP_T1_scaled | MUNICIPALITY)
##    Data: d
## 
## REML criterion at convergence: 1088.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1139 -0.6620 -0.0855  0.6279  4.4705 
## 
## Random effects:
##  Groups       Name          Variance Std.Dev. Corr 
##  MUNICIPALITY (Intercept)   0.023645 0.15377       
##               DBP_T1_scaled 0.002221 0.04713  -1.00
##  Residual                   0.397191 0.63023       
## Number of obs: 534, groups:  MUNICIPALITY, 37
## 
## Fixed effects:
##                               Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                    1.58456    0.04070  32.17344  38.929  < 2e-16
## SBP_T1_scaled                 -0.07254    0.08964 442.24476  -0.809  0.41880
## DBP_T1_scaled                 -0.21968    0.08650 492.72019  -2.540  0.01140
## HTN_MED_T1_scaled              0.10368    0.03744 465.44767   2.769  0.00584
## CHO_T1_scaled                  0.05111    0.03149 516.75944   1.623  0.10514
## BMI_T1_scaled                  0.12128    0.04638 448.35720   2.615  0.00923
## PACKYEARS_scaled               0.05661    0.04443 519.17042   1.274  0.20321
## METABOLIC_DISORDER_T1_scaled  -0.03163    0.03441 514.20285  -0.919  0.35845
## ALCOHOL_INTAKE_T1_scaled      -0.18535    0.04805 482.58525  -3.857  0.00013
## MWK_VAL_scaled                -0.02954    0.03359 444.04411  -0.880  0.37956
## NSES_scaled                   -0.13799    0.03145 517.64243  -4.388 1.39e-05
## GROUP_SIZE_scaled              0.06279    0.02911 514.37070   2.157  0.03149
##                                 
## (Intercept)                  ***
## SBP_T1_scaled                   
## DBP_T1_scaled                *  
## HTN_MED_T1_scaled            ** 
## CHO_T1_scaled                   
## BMI_T1_scaled                ** 
## PACKYEARS_scaled                
## METABOLIC_DISORDER_T1_scaled    
## ALCOHOL_INTAKE_T1_scaled     ***
## MWK_VAL_scaled                  
## NSES_scaled                  ***
## GROUP_SIZE_scaled            *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SBP_T1 DBP_T1 HTN_ME CHO_T1 BMI_T1 PACKYE METABO ALCOHO
## SBP_T1_scld -0.011                                                        
## DBP_T1_scld -0.109 -0.784                                                 
## HTN_MED_T1_ -0.049 -0.040  0.052                                          
## CHO_T1_scld -0.007 -0.131  0.035 -0.145                                   
## BMI_T1_scld -0.028 -0.096 -0.214 -0.189 -0.098                            
## PACKYEARS_s  0.021 -0.068 -0.074 -0.193  0.016 -0.132                     
## METABOLIC_D  0.011 -0.086  0.059 -0.301  0.081 -0.095 -0.155              
## ALCOHOL_INT  0.069 -0.164 -0.182  0.195 -0.042  0.348 -0.356 -0.022       
## MWK_VAL_scl -0.010 -0.216  0.136  0.039  0.170  0.017 -0.089 -0.006 -0.187
## NSES_scaled -0.026  0.159 -0.192  0.043 -0.182  0.164  0.149  0.081 -0.099
## GROUP_SIZE_  0.004  0.017 -0.029 -0.007  0.046  0.018 -0.049  0.002  0.165
##             MWK_VA NSES_s
## SBP_T1_scld              
## DBP_T1_scld              
## HTN_MED_T1_              
## CHO_T1_scld              
## BMI_T1_scld              
## PACKYEARS_s              
## METABOLIC_D              
## ALCOHOL_INT              
## MWK_VAL_scl              
## NSES_scaled  0.003       
## GROUP_SIZE_ -0.090 -0.049
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

#The random effect of the slope of DBP_T1_scaled | MUNICIPALITY is very small
ranova(m3)

## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + (DBP_T1_scaled | MUNICIPALITY)
##                                                 npar  logLik    AIC    LRT Df
## <none>                                            16 -544.22 1120.4          
## DBP_T1_scaled in (DBP_T1_scaled | MUNICIPALITY)   14 -544.95 1117.9 1.4619  2
##                                                 Pr(>Chisq)
## <none>                                                    
## DBP_T1_scaled in (DBP_T1_scaled | MUNICIPALITY)     0.4814

#And non-significant

# Model 4
m4 <- lmer(MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled +
             BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled +
             MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + 
             (HTN_MED_T1_scaled | MUNICIPALITY), data = d)
summary(m4)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled +  
##     CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled +  
##     ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled +  
##     GROUP_SIZE_scaled + (HTN_MED_T1_scaled | MUNICIPALITY)
##    Data: d
## 
## REML criterion at convergence: 1089.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1758 -0.6512 -0.0619  0.6140  4.4206 
## 
## Random effects:
##  Groups       Name              Variance Std.Dev. Corr
##  MUNICIPALITY (Intercept)       0.025303 0.15907      
##               HTN_MED_T1_scaled 0.001439 0.03793  0.71
##  Residual                       0.396437 0.62963      
## Number of obs: 534, groups:  MUNICIPALITY, 37
## 
## Fixed effects:
##                               Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                    1.58574    0.04064  33.93460  39.018  < 2e-16
## SBP_T1_scaled                 -0.08269    0.09055 456.03610  -0.913 0.361643
## DBP_T1_scaled                 -0.23202    0.08662 510.60752  -2.679 0.007632
## HTN_MED_T1_scaled              0.10410    0.03831  29.41253   2.717 0.010926
## CHO_T1_scaled                  0.04977    0.03162 515.99403   1.574 0.116082
## BMI_T1_scaled                  0.10722    0.04651 437.80501   2.306 0.021603
## PACKYEARS_scaled               0.06040    0.04456 518.11113   1.355 0.175912
## METABOLIC_DISORDER_T1_scaled  -0.03432    0.03453 508.25161  -0.994 0.320744
## ALCOHOL_INTAKE_T1_scaled      -0.17219    0.04810 297.30918  -3.580 0.000401
## MWK_VAL_scaled                -0.03106    0.03405 459.18432  -0.912 0.362143
## NSES_scaled                   -0.14448    0.03118 443.27935  -4.634 4.73e-06
## GROUP_SIZE_scaled              0.05813    0.02918 517.44522   1.992 0.046877
##                                 
## (Intercept)                  ***
## SBP_T1_scaled                   
## DBP_T1_scaled                ** 
## HTN_MED_T1_scaled            *  
## CHO_T1_scaled                   
## BMI_T1_scaled                *  
## PACKYEARS_scaled                
## METABOLIC_DISORDER_T1_scaled    
## ALCOHOL_INTAKE_T1_scaled     ***
## MWK_VAL_scaled                  
## NSES_scaled                  ***
## GROUP_SIZE_scaled            *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) SBP_T1 DBP_T1 HTN_ME CHO_T1 BMI_T1 PACKYE METABO ALCOHO
## SBP_T1_scld  0.010                                                        
## DBP_T1_scld -0.036 -0.795                                                 
## HTN_MED_T1_  0.056 -0.045  0.038                                          
## CHO_T1_scld -0.005 -0.137  0.037 -0.157                                   
## BMI_T1_scld  0.000 -0.104 -0.203 -0.189 -0.098                            
## PACKYEARS_s  0.007 -0.069 -0.073 -0.176  0.012 -0.135                     
## METABOLIC_D  0.018 -0.080  0.061 -0.297  0.086 -0.096 -0.157              
## ALCOHOL_INT  0.049 -0.154 -0.183  0.207 -0.029  0.359 -0.356 -0.026       
## MWK_VAL_scl -0.024 -0.216  0.141  0.040  0.154  0.006 -0.086 -0.011 -0.197
## NSES_scaled -0.011  0.151 -0.197  0.038 -0.188  0.148  0.153  0.082 -0.083
## GROUP_SIZE_  0.013  0.022 -0.033 -0.005  0.046  0.015 -0.054  0.001  0.171
##             MWK_VA NSES_s
## SBP_T1_scld              
## DBP_T1_scld              
## HTN_MED_T1_              
## CHO_T1_scld              
## BMI_T1_scld              
## PACKYEARS_s              
## METABOLIC_D              
## ALCOHOL_INT              
## MWK_VAL_scl              
## NSES_scaled  0.004       
## GROUP_SIZE_ -0.085 -0.052

#Also small
ranova(m4)

## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## MENTAL_DISORDER_T1 ~ SBP_T1_scaled + DBP_T1_scaled + HTN_MED_T1_scaled + CHO_T1_scaled + BMI_T1_scaled + PACKYEARS_scaled + METABOLIC_DISORDER_T1_scaled + ALCOHOL_INTAKE_T1_scaled + MWK_VAL_scaled + NSES_scaled + GROUP_SIZE_scaled + (HTN_MED_T1_scaled | MUNICIPALITY)
##                                                         npar  logLik    AIC
## <none>                                                    16 -544.69 1121.4
## HTN_MED_T1_scaled in (HTN_MED_T1_scaled | MUNICIPALITY)   14 -544.95 1117.9
##                                                             LRT Df Pr(>Chisq)
## <none>                                                                       
## HTN_MED_T1_scaled in (HTN_MED_T1_scaled | MUNICIPALITY) 0.52467  2     0.7693

#And also non-significant

AdvStats code+output

Jesse Lindenhovius

2024-06-07

Libraries used

Data cleaning

Descriptive statistics

Fitting LMM 1

Fitting LMM 2

Trying Random slopes for m3 and m4

Note: I decided not to write about this as I still do not completely understand the random slopes model…