library(haven)
library(lmerTest)
library(tidyverse)
library(robustlmm)
library(emmeans)
library(easystats)
options(scipen = 999)LMM-analyse
Packages
Import SAV file
lmer_long_sav <- read_sav("/Volumes/datafiler/long_data.sav")# Remove SPSS noise :D
lmer_long_sav$AB_kode <- as.factor(lmer_long_sav$AB_kode)
lmer_long_sav$Sex <- as.factor(lmer_long_sav$Sex)
lmer_long_sav$PP_L <- as.factor(lmer_long_sav$PP_L)
lmer_long_sav$ITT_L <- as.factor(lmer_long_sav$ITT_L)Graphs raw data
Pain over time pr. patient
@fig_all_pain_courses shows a graphical overview of all patients development
Code
lmer_long_sav |>
arrange(ID, Time) |>
ggplot(aes(x = Time, y = Leg, group = ID, color = AB_kode)) +
geom_line() +
facet_wrap(~ ID) +
labs(x = "", y = "", title = "") +
theme_minimal() +
theme(axis.text.x = element_blank(), # remove x axis labels
axis.ticks.x = element_blank(), # remove x axis ticks
axis.text.y = element_blank(), # remove y axis labels
axis.ticks.y = element_blank() # remove y axis ticks
)Pain - time - groups
@fig_pain_course_groups shows a graphical overview of the groups development for pain
Code
lmer_long_sav |>
group_by(Time, AB_kode) |>
summarise(mean_pain = mean(Leg, na.rm = T),
sd = sd(Leg, na.rm = T),
n = sum(!is.na(Leg))) |>
mutate(se = sd / sqrt(n),
lower.ci = mean_pain - qt(1 - (0.05 / 2), n - 1) * se,
upper.ci = mean_pain + qt(1 - (0.05 / 2), n - 1) * se) |>
ggplot(aes(x = Time, y = mean_pain, color = AB_kode)) +
geom_line(size = 1) +
geom_ribbon(aes(ymin = lower.ci, ymax = upper.ci), alpha = 0.08, size = 0) +
scale_y_continuous(breaks = 0:10, limits = c(0, 10)) +
scale_x_continuous(breaks = 1:10) +
theme_classic() +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5)) +
labs(
x = "Days",
y = "Mean pain Intensity",
title = "Mean pain over time (95% CI) - Raw scores",
color = "Group"
)Pain DIFFERENCE groups data
Code
lmer_long_sav %>%
group_by(Time, AB_kode) %>%
summarise(mean_pain = mean(Leg, na.rm = TRUE)) %>%
pivot_wider(names_from = AB_kode, values_from = mean_pain) %>%
rename(Group_One = "1",
Group_Two = "2") |>
summarise(difference = Group_Two - Group_One) |>
ggplot(aes(x = Time, y = difference)) +
geom_line(size = 1) +
theme_classic() +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5)) +
scale_x_continuous(breaks = 1:10) +
scale_y_continuous(limits = c(-1, 0)) +
labs(
x = "Days",
y = "Mean pain difference \n(B - A)",
title = "",
color = ""
)Linear mixed models
Nelder_Mead optimizer was chosen as this was the only working for the latter complex models
SAP model with random intercept only (no random slope for time)
sap_model <- lmer(Leg ~ 1 + AB_kode + Time + AB_kode:Time +
Leg_0 + Age + Sex +
(1 | ID),
REML = F,
control = lmerControl(optimizer = "Nelder_Mead"),
data = lmer_long_sav)
summary(sap_model)Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
method [lmerModLmerTest]
Formula: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex +
(1 | ID)
Data: lmer_long_sav
Control: lmerControl(optimizer = "Nelder_Mead")
AIC BIC logLik deviance df.resid
3818.8 3864.2 -1900.4 3800.8 1134
Scaled residuals:
Min 1Q Median 3Q Max
-5.6351 -0.5413 0.0141 0.5306 4.0320
Random effects:
Groups Name Variance Std.Dev.
ID (Intercept) 1.282 1.132
Residual 1.272 1.128
Number of obs: 1143, groups: ID, 120
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 1.439382 0.675307 126.246998 2.131 0.0350
AB_kode2 -0.328351 0.254104 212.707701 -1.292 0.1977
Time -0.011181 0.016462 1027.418997 -0.679 0.4972
Leg_0 0.850340 0.078110 121.722733 10.886 <0.0000000000000002
Age -0.010597 0.009899 121.281680 -1.070 0.2865
Sex2 -0.401505 0.223855 120.628453 -1.794 0.0754
AB_kode2:Time -0.053907 0.023405 1028.006142 -2.303 0.0215
(Intercept) *
AB_kode2
Time
Leg_0 ***
Age
Sex2 .
AB_kode2:Time *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) AB_kd2 Time Leg_0 Age Sex2
AB_kode2 -0.093
Time -0.139 0.353
Leg_0 -0.766 -0.036 0.008
Age -0.608 -0.118 0.000 0.052
Sex2 0.028 0.052 -0.003 -0.117 -0.148
AB_kode2:Tm 0.103 -0.500 -0.703 -0.007 -0.007 -0.001
SAP with random intercept + random slope
sap_rs_model <- lmer(Leg ~ 1 + AB_kode + Time + AB_kode:Time +
Leg_0 + Age + Sex +
(1 + Time | ID),
REML = F,
control = lmerControl(optimizer = "Nelder_Mead"),
data = lmer_long_sav)
summary(sap_rs_model)Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
method [lmerModLmerTest]
Formula: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex +
(1 + Time | ID)
Data: lmer_long_sav
Control: lmerControl(optimizer = "Nelder_Mead")
AIC BIC logLik deviance df.resid
3695.0 3750.5 -1836.5 3673.0 1132
Scaled residuals:
Min 1Q Median 3Q Max
-5.4713 -0.4498 0.0131 0.4872 4.5935
Random effects:
Groups Name Variance Std.Dev. Corr
ID (Intercept) 0.64615 0.8038
Time 0.02594 0.1611 -0.08
Residual 1.03455 1.0171
Number of obs: 1143, groups: ID, 120
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 1.043189 0.557020 121.835848 1.873 0.0635 .
AB_kode2 -0.348347 0.198224 119.555415 -1.757 0.0814 .
Time -0.008587 0.025576 121.053456 -0.336 0.7376
Leg_0 0.849844 0.064770 119.974237 13.121 <0.0000000000000002 ***
Age -0.002285 0.008228 120.646799 -0.278 0.7817
Sex2 -0.291369 0.186044 119.892161 -1.566 0.1200
AB_kode2:Time -0.053564 0.036455 120.113414 -1.469 0.1444
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) AB_kd2 Time Leg_0 Age Sex2
AB_kode2 -0.075
Time -0.095 0.274
Leg_0 -0.766 -0.043 -0.009
Age -0.612 -0.122 0.005 0.050
Sex2 0.029 0.056 0.005 -0.121 -0.146
AB_kode2:Tm 0.070 -0.387 -0.702 0.003 -0.005 -0.002
Parametre estimates
Parametre estimates using “parameters” package. An approximation of Stata results and p-values (two-tailed) computed using a Wald t-distribution approximation.
parameters::model_parameters(sap_rs_model)# Fixed Effects
Parameter | Coefficient | SE | 95% CI | t(1132) | p
------------------------------------------------------------------------------
(Intercept) | 1.04 | 0.56 | [-0.05, 2.14] | 1.87 | 0.061
AB kode [2] | -0.35 | 0.20 | [-0.74, 0.04] | -1.76 | 0.079
Time | -8.59e-03 | 0.03 | [-0.06, 0.04] | -0.34 | 0.737
Leg 0 | 0.85 | 0.06 | [ 0.72, 0.98] | 13.12 | < .001
Age | -2.29e-03 | 8.23e-03 | [-0.02, 0.01] | -0.28 | 0.781
Sex [2] | -0.29 | 0.19 | [-0.66, 0.07] | -1.57 | 0.118
AB kode [2] × Time | -0.05 | 0.04 | [-0.13, 0.02] | -1.47 | 0.142
# Random Effects
Parameter | Coefficient
--------------------------------------
SD (Intercept: ID) | 0.80
SD (Time: ID) | 0.16
Cor (Intercept~Time: ID) | -0.08
SD (Residual) | 1.02
Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
using a Wald t-distribution approximation.
P-values etc from the parameters package (robust)
parameters::model_parameters(sap_rs_model, bootstrap = TRUE, ci_method = "bcai", iterations = 5000)Bootstrapping only returns fixed effects of the mixed model.
# Fixed Effects
Parameter | Coefficient | 95% CI | p
---------------------------------------------------------
(Intercept) | 1.04 | [-0.07, 2.14] | 0.064
AB kode [2] | -0.35 | [-0.72, 0.05] | 0.084
Time | -9.17e-03 | [-0.06, 0.04] | 0.718
Leg 0 | 0.85 | [ 0.72, 0.98] | < .001
Age | -2.34e-03 | [-0.02, 0.01] | 0.770
Sex [2] | -0.29 | [-0.66, 0.07] | 0.115
AB kode [2] × Time | -0.05 | [-0.12, 0.02] | 0.142
Uncertainty intervals (equal-tailed) are Wald intervals.
SAP with random intercept + random slope - WITHOUT interaction
sap_rs_model_no_int <- lmer(Leg ~ 1 + AB_kode + Time +
Leg_0 + Age + Sex +
(1 + Time | ID),
REML = F,
control = lmerControl(optimizer = "Nelder_Mead"),
data = lmer_long_sav)
summary(sap_rs_model_no_int)Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
method [lmerModLmerTest]
Formula: Leg ~ 1 + AB_kode + Time + Leg_0 + Age + Sex + (1 + Time | ID)
Data: lmer_long_sav
Control: lmerControl(optimizer = "Nelder_Mead")
AIC BIC logLik deviance df.resid
3695.2 3745.6 -1837.6 3675.2 1133
Scaled residuals:
Min 1Q Median 3Q Max
-5.4246 -0.4463 0.0152 0.4930 4.5899
Random effects:
Groups Name Variance Std.Dev. Corr
ID (Intercept) 0.64969 0.8060
Time 0.02665 0.1632 -0.09
Residual 1.03454 1.0171
Number of obs: 1143, groups: ID, 120
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 1.100222 0.555772 120.812080 1.980 0.0500 .
AB_kode2 -0.461157 0.182792 120.138830 -2.523 0.0129 *
Time -0.034926 0.018390 120.174137 -1.899 0.0599 .
Leg_0 0.850093 0.064781 119.957596 13.123 <0.0000000000000002 ***
Age -0.002346 0.008229 120.625898 -0.285 0.7761
Sex2 -0.291888 0.186074 119.875323 -1.569 0.1194
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) AB_kd2 Time Leg_0 Age
AB_kode2 -0.053
Time -0.066 0.004
Leg_0 -0.768 -0.046 -0.009
Age -0.613 -0.135 0.002 0.050
Sex2 0.029 0.060 0.005 -0.121 -0.146
Comparing the models
anova(sap_model, sap_rs_model, sap_rs_model_no_int)Data: lmer_long_sav
Models:
sap_model: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex + (1 | ID)
sap_rs_model_no_int: Leg ~ 1 + AB_kode + Time + Leg_0 + Age + Sex + (1 + Time | ID)
sap_rs_model: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex + (1 + Time | ID)
npar AIC BIC logLik deviance Chisq Df
sap_model 9 3818.8 3864.2 -1900.4 3800.8
sap_rs_model_no_int 10 3695.2 3745.6 -1837.6 3675.2 125.6784 1
sap_rs_model 11 3695.0 3750.5 -1836.5 3673.0 2.1395 1
Pr(>Chisq)
sap_model
sap_rs_model_no_int <0.0000000000000002 ***
sap_rs_model 0.1436
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::compare_performance(sap_model, sap_rs_model, sap_rs_model_no_int, rank = T)Some of the nested models seem to be identical and probably only vary in
their random effects.
# Comparison of Model Performance Indices
Name | Model | R2 (cond.) | R2 (marg.) | ICC | RMSE | Sigma | AIC weights | AICc weights | BIC weights | Performance-Score
------------------------------------------------------------------------------------------------------------------------------------------------------
sap_rs_model_no_int | lmerModLmerTest | 0.742 | 0.357 | 0.598 | 0.940 | 1.017 | 0.483 | 0.487 | 0.921 | 85.64%
sap_rs_model | lmerModLmerTest | 0.744 | 0.365 | 0.596 | 0.940 | 1.017 | 0.517 | 0.513 | 0.079 | 82.99%
sap_model | lmerModLmerTest | 0.687 | 0.371 | 0.502 | 1.073 | 1.128 | 6.72e-28 | 6.91e-28 | 1.59e-26 | 12.50%
The model without the interaction performs the best, but no extreme difference. As it is prespec. in the SAP, the interaction is kept.
Model assumptions (sap_rs_model)
resid <- residuals(sap_rs_model)
hist(resid, main = "Residual Histogram", breaks = 50 )check_normality(sap_rs_model)Warning: Non-normality of residuals detected (p < .001).
Shapiro-Wilk normality test
shapiro.test(resid)
Shapiro-Wilk normality test
data: resid
W = 0.96634, p-value = 0.000000000000001284
plot(check_normality(sap_rs_model), type = "qq")plot(check_normality(sap_rs_model), type = "pp")plot(sap_rs_model, show_residuals = TRUE, show_residuals_line = TRUE)Outliers
check_outliers(sap_rs_model)1 outlier detected: case 740.
- Based on the following method and threshold: cook (0.9).
- For variable: (Whole model).
plot(check_outliers(sap_rs_model), type = "dots")Posterior predictive checks “Posterior predictive checks mean”simulating replicated data under the fitted model and then comparing these to the observed data” (Gelman and Hill, 2007, p. 158). Posterior predictive checks can be used to “look for systematic discrepancies between real and simulated data” (Gelman et al. 2014, p. 169).”
check_range … if TRUE, includes a plot with the minimum value of the original response against the minimum values of the replicated responses, and the same for the maximum value. This plot helps judging whether the variation in the original data is captured by the model or not (Gelman et al. 2020, pp.163). The minimum and maximum values of y should be inside the range of the related minimum and maximum values of yrep.
check_posterior_predictions(sap_rs_model, check_range = TRUE, iterations = 50)Warning: Failed to compute posterior predictive checks with `re_formula=NULL`.
Trying again with `re_formula=NA` now.
Binned residuals “Binned residual plots are achieved by”dividing the data into categories (bins) based on their fitted values, and then plotting the average residual versus the average fitted value for each bin.” (Gelman, Hill 2007: 97). If the model were true, one would expect about 95% of the residuals to fall inside the error bounds.”
plot(binned_residuals(sap_rs_model))Warning: Removed 29 rows containing missing values (`geom_point()`).
Transformation
Jeg har forsøkt å sjekke effekten av å normalisere outcome, men uten større hell. Kjørt en normaliserings sjekk og resultatet forbedres noe med sqrt transformering, men residualene er fortsatt ikke normal fordelte.
bestNormalize::bestNormalize(lmer_long_sav$Leg)Best Normalizing transformation with 1143 Observations
Estimated Normality Statistics (Pearson P / df, lower => more normal):
- arcsinh(x): 11.6549
- Center+scale: 11.4934
- Double Reversed Log_b(x+a): 11.6908
- Exp(x): 40.7672
- Log_b(x+a): 12.4506
- orderNorm (ORQ): 11.4988
- sqrt(x + a): 11.3853
- Yeo-Johnson: 11.5593
Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
Based off these, bestNormalize chose:
Standardized sqrt(x + a) Transformation with 1143 nonmissing obs.:
Relevant statistics:
- a = 0
- mean (before standardization) = 2.402077
- sd (before standardization) = 0.4593078
Marginal means
emmeans package
Model with interaction
# Model with interaction
emmeans::emmeans(sap_rs_model, specs = "AB_kode")NOTE: Results may be misleading due to involvement in interactions
AB_kode emmean SE df lower.CL upper.CL
1 6.29 0.157 124 5.98 6.60
2 5.65 0.160 125 5.33 5.97
Results are averaged over the levels of: Sex
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
Model without interaction
# Model without interaction
emmeans::emmeans(sap_rs_model_no_int, specs = "AB_kode") AB_kode emmean SE df lower.CL upper.CL
1 6.20 0.145 146 5.92 6.49
2 5.74 0.148 147 5.45 6.03
Results are averaged over the levels of: Sex
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
Model predictions
marginaleffects package
Avg. predictions AB_kode and time
Code
sap_rs_model_avg_preds <- marginaleffects::avg_predictions(sap_rs_model, by = c("AB_kode", "Time"))
sap_rs_model_avg_preds |>
ggplot(aes(x = Time, y = estimate, colour = AB_kode, fill = AB_kode))+
geom_line() +
geom_ribbon(colour = NA, alpha=0.1, aes(ymin = conf.low, ymax = conf.high)) +
scale_y_continuous(breaks = 0:10, limits = c(0, 10)) +
scale_x_continuous(breaks = 1:10) +
theme_classic() +
labs(x = "Day", y = "Predicted pain\nNRS (0–10)\nMean, 95% CI") +
theme(axis.title.y = element_text(angle = 0, vjust = 0.5))Robust lmer model: sap_rs_model
Field, Andy P, and Rand R Wilcox. 2017. ‘Robust Statistical Methods: A Primer for Clinical Psychology and Experimental Psychopathology Researchers’. Behaviour Research and Therapy 98 (November): 19–38. https://doi.org/10.1016/j.brat.2017.05.013.
robust_rs_model <- rlmer(Leg ~ 1 + AB_kode + Time + AB_kode:Time +
Leg_0 + Age + Sex +
(1 + Time | ID),
REML = F,
data = lmer_long_sav)
summary(robust_rs_model)Robust linear mixed model fit by DAStau
Formula: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex + (1 + Time | ID)
Data: lmer_long_sav
Scaled residuals:
Min 1Q Median 3Q Max
-7.8909 -0.5444 0.0039 0.5290 6.8259
Random effects:
Groups Name Variance Std.Dev. Corr
ID (Intercept) 0.75243 0.8674
Time 0.02339 0.1529 0.00
Residual 0.67843 0.8237
Number of obs: 1143, groups: ID, 120
Fixed effects:
Estimate Std. Error t value
(Intercept) 0.697127652 0.574379804 1.214
AB_kode2 -0.370082216 0.195811033 -1.890
Time -0.000001916 0.023522283 0.000
Leg_0 0.882188177 0.066995233 13.168
Age 0.002158565 0.008505458 0.254
Sex2 -0.305124613 0.192396251 -1.586
AB_kode2:Time -0.071621846 0.033543694 -2.135
Correlation of Fixed Effects:
(Intr) AB_kd2 Time Leg_0 Age Sex2
AB_kode2 -0.063
Time -0.060 0.187
Leg_0 -0.769 -0.045 -0.010
Age -0.613 -0.129 0.004 0.050
Sex2 0.030 0.059 0.005 -0.121 -0.147
AB_kode2:Tm 0.043 -0.263 -0.701 0.004 -0.003 -0.002
Robustness weights for the residuals:
922 weights are ~= 1. The remaining 221 ones are summarized as
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.170 0.556 0.737 0.716 0.918 0.999
Robustness weights for the random effects:
226 weights are ~= 1. The remaining 14 ones are summarized as
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.337 0.536 0.851 0.750 0.976 0.987
Rho functions used for fitting:
Residuals:
eff: smoothed Huber (k = 1.345, s = 10)
sig: smoothed Huber, Proposal 2 (k = 1.345, s = 10)
Random Effects, variance component 1 (ID):
eff: smoothed Huber (k = 5.14, s = 10)
vcp: smoothed Huber (k = 5.14, s = 10)
Est. marginal means (robust)
emmeans::emmeans(robust_rs_model, specs = "AB_kode")NOTE: Results may be misleading due to involvement in interactions
AB_kode emmean SE df asymp.LCL asymp.UCL
1 6.38 0.161 Inf 6.07 6.70
2 5.62 0.165 Inf 5.30 5.94
Results are averaged over the levels of: Sex
Confidence level used: 0.95
With interaction
emmeans::emmeans(robust_rs_model, specs = c("AB_kode", "Time")) AB_kode Time emmean SE df asymp.LCL asymp.UCL
1 5.47 6.38 0.161 Inf 6.07 6.70
2 5.47 5.62 0.165 Inf 5.30 5.94
Results are averaged over the levels of: Sex
Confidence level used: 0.95