LMM-analyse

Packages

library(haven)
library(lmerTest)
library(tidyverse)
library(robustlmm)
library(emmeans)
library(easystats)

options(scipen = 999)

Import SAV file

lmer_long_sav <- read_sav("/Volumes/datafiler/long_data.sav")

# Remove SPSS noise :D
lmer_long_sav$AB_kode <- as.factor(lmer_long_sav$AB_kode)
lmer_long_sav$Sex <- as.factor(lmer_long_sav$Sex)
lmer_long_sav$PP_L <- as.factor(lmer_long_sav$PP_L)
lmer_long_sav$ITT_L <- as.factor(lmer_long_sav$ITT_L)

Graphs raw data

Pain over time pr. patient

@fig_all_pain_courses shows a graphical overview of all patients development

Code

lmer_long_sav |> 
  arrange(ID, Time) |> 
ggplot(aes(x = Time, y = Leg, group = ID, color = AB_kode)) +
  geom_line() +
  facet_wrap(~ ID) +
  labs(x = "", y = "", title = "") +
  theme_minimal() +
   theme(axis.text.x = element_blank(),  # remove x axis labels
        axis.ticks.x = element_blank(),  # remove x axis ticks
        axis.text.y = element_blank(),   # remove y axis labels
        axis.ticks.y = element_blank()   # remove y axis ticks
        )

Pain - time - groups

@fig_pain_course_groups shows a graphical overview of the groups development for pain

Code

lmer_long_sav |> 
  group_by(Time, AB_kode) |> 
  summarise(mean_pain = mean(Leg, na.rm = T),
            sd = sd(Leg, na.rm = T),
            n = sum(!is.na(Leg))) |> 
  mutate(se = sd / sqrt(n),
         lower.ci = mean_pain - qt(1 - (0.05 / 2), n - 1) * se,
         upper.ci = mean_pain + qt(1 - (0.05 / 2), n - 1) * se) |> 
  ggplot(aes(x = Time, y = mean_pain, color = AB_kode)) +
    geom_line(size = 1) +
    geom_ribbon(aes(ymin = lower.ci, ymax = upper.ci), alpha = 0.08, size = 0) +
  scale_y_continuous(breaks = 0:10, limits = c(0, 10)) +
  scale_x_continuous(breaks = 1:10) +
  theme_classic() +
  theme(axis.title.y = element_text(angle = 0, vjust = 0.5)) +
  labs(
    x = "Days",
    y = "Mean pain Intensity",
    title = "Mean pain over time (95% CI) - Raw scores",
    color = "Group"
  )

Pain DIFFERENCE groups data

Code

lmer_long_sav %>%
  group_by(Time, AB_kode) %>%
  summarise(mean_pain = mean(Leg, na.rm = TRUE)) %>%
  pivot_wider(names_from = AB_kode, values_from = mean_pain) %>%
  rename(Group_One = "1",
         Group_Two = "2") |> 
  summarise(difference = Group_Two - Group_One) |> 
   ggplot(aes(x = Time, y = difference)) +
    geom_line(size = 1) +
  theme_classic() +
  theme(axis.title.y = element_text(angle = 0, vjust = 0.5)) +
  scale_x_continuous(breaks = 1:10) +
  scale_y_continuous(limits = c(-1, 0)) +
  labs(
    x = "Days",
    y = "Mean pain difference \n(B - A)",
    title = "",
    color = ""
  )

Linear mixed models

Nelder_Mead optimizer was chosen as this was the only working for the latter complex models

SAP model with random intercept only (no random slope for time)

sap_model <- lmer(Leg ~ 1 + AB_kode + Time + AB_kode:Time + 
                    Leg_0 + Age + Sex + 
                    (1 | ID),
                    REML = F,
                    control = lmerControl(optimizer = "Nelder_Mead"),
                    data = lmer_long_sav)

summary(sap_model)

Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex +  
    (1 | ID)
   Data: lmer_long_sav
Control: lmerControl(optimizer = "Nelder_Mead")

     AIC      BIC   logLik deviance df.resid 
  3818.8   3864.2  -1900.4   3800.8     1134 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-5.6351 -0.5413  0.0141  0.5306  4.0320 

Random effects:
 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 1.282    1.132   
 Residual             1.272    1.128   
Number of obs: 1143, groups:  ID, 120

Fixed effects:
                 Estimate  Std. Error          df t value            Pr(>|t|)
(Intercept)      1.439382    0.675307  126.246998   2.131              0.0350
AB_kode2        -0.328351    0.254104  212.707701  -1.292              0.1977
Time            -0.011181    0.016462 1027.418997  -0.679              0.4972
Leg_0            0.850340    0.078110  121.722733  10.886 <0.0000000000000002
Age             -0.010597    0.009899  121.281680  -1.070              0.2865
Sex2            -0.401505    0.223855  120.628453  -1.794              0.0754
AB_kode2:Time   -0.053907    0.023405 1028.006142  -2.303              0.0215
                 
(Intercept)   *  
AB_kode2         
Time             
Leg_0         ***
Age              
Sex2          .  
AB_kode2:Time *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) AB_kd2 Time   Leg_0  Age    Sex2  
AB_kode2    -0.093                                   
Time        -0.139  0.353                            
Leg_0       -0.766 -0.036  0.008                     
Age         -0.608 -0.118  0.000  0.052              
Sex2         0.028  0.052 -0.003 -0.117 -0.148       
AB_kode2:Tm  0.103 -0.500 -0.703 -0.007 -0.007 -0.001

SAP with random intercept + random slope

sap_rs_model <- lmer(Leg ~ 1 + AB_kode + Time + AB_kode:Time + 
                                Leg_0 + Age + Sex + 
                                (1 + Time | ID),
                    REML = F,
                    control = lmerControl(optimizer = "Nelder_Mead"),
                    data = lmer_long_sav)

summary(sap_rs_model)

Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex +  
    (1 + Time | ID)
   Data: lmer_long_sav
Control: lmerControl(optimizer = "Nelder_Mead")

     AIC      BIC   logLik deviance df.resid 
  3695.0   3750.5  -1836.5   3673.0     1132 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-5.4713 -0.4498  0.0131  0.4872  4.5935 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 ID       (Intercept) 0.64615  0.8038        
          Time        0.02594  0.1611   -0.08
 Residual             1.03455  1.0171        
Number of obs: 1143, groups:  ID, 120

Fixed effects:
                Estimate Std. Error         df t value            Pr(>|t|)    
(Intercept)     1.043189   0.557020 121.835848   1.873              0.0635 .  
AB_kode2       -0.348347   0.198224 119.555415  -1.757              0.0814 .  
Time           -0.008587   0.025576 121.053456  -0.336              0.7376    
Leg_0           0.849844   0.064770 119.974237  13.121 <0.0000000000000002 ***
Age            -0.002285   0.008228 120.646799  -0.278              0.7817    
Sex2           -0.291369   0.186044 119.892161  -1.566              0.1200    
AB_kode2:Time  -0.053564   0.036455 120.113414  -1.469              0.1444    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) AB_kd2 Time   Leg_0  Age    Sex2  
AB_kode2    -0.075                                   
Time        -0.095  0.274                            
Leg_0       -0.766 -0.043 -0.009                     
Age         -0.612 -0.122  0.005  0.050              
Sex2         0.029  0.056  0.005 -0.121 -0.146       
AB_kode2:Tm  0.070 -0.387 -0.702  0.003 -0.005 -0.002

Parametre estimates

Parametre estimates using “parameters” package. An approximation of Stata results and p-values (two-tailed) computed using a Wald t-distribution approximation.

parameters::model_parameters(sap_rs_model)

# Fixed Effects

Parameter          | Coefficient |       SE |        95% CI | t(1132) |      p
------------------------------------------------------------------------------
(Intercept)        |        1.04 |     0.56 | [-0.05, 2.14] |    1.87 | 0.061 
AB kode [2]        |       -0.35 |     0.20 | [-0.74, 0.04] |   -1.76 | 0.079 
Time               |   -8.59e-03 |     0.03 | [-0.06, 0.04] |   -0.34 | 0.737 
Leg 0              |        0.85 |     0.06 | [ 0.72, 0.98] |   13.12 | < .001
Age                |   -2.29e-03 | 8.23e-03 | [-0.02, 0.01] |   -0.28 | 0.781 
Sex [2]            |       -0.29 |     0.19 | [-0.66, 0.07] |   -1.57 | 0.118 
AB kode [2] × Time |       -0.05 |     0.04 | [-0.13, 0.02] |   -1.47 | 0.142 

# Random Effects

Parameter                | Coefficient
--------------------------------------
SD (Intercept: ID)       |        0.80
SD (Time: ID)            |        0.16
Cor (Intercept~Time: ID) |       -0.08
SD (Residual)            |        1.02


Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
  using a Wald t-distribution approximation.

P-values etc from the parameters package (robust)

parameters::model_parameters(sap_rs_model, bootstrap = TRUE, ci_method = "bcai", iterations = 5000)

Bootstrapping only returns fixed effects of the mixed model.

# Fixed Effects

Parameter          | Coefficient |        95% CI |      p
---------------------------------------------------------
(Intercept)        |        1.04 | [-0.07, 2.14] | 0.064 
AB kode [2]        |       -0.35 | [-0.72, 0.05] | 0.084 
Time               |   -9.17e-03 | [-0.06, 0.04] | 0.718 
Leg 0              |        0.85 | [ 0.72, 0.98] | < .001
Age                |   -2.34e-03 | [-0.02, 0.01] | 0.770 
Sex [2]            |       -0.29 | [-0.66, 0.07] | 0.115 
AB kode [2] × Time |       -0.05 | [-0.12, 0.02] | 0.142


Uncertainty intervals (equal-tailed) are Wald intervals.

SAP with random intercept + random slope - WITHOUT interaction

sap_rs_model_no_int <- lmer(Leg ~ 1 + AB_kode + Time + 
                                Leg_0 + Age + Sex + 
                                (1 + Time | ID),
                    REML = F,
                    control = lmerControl(optimizer = "Nelder_Mead"),
                    data = lmer_long_sav)

summary(sap_rs_model_no_int)

Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: Leg ~ 1 + AB_kode + Time + Leg_0 + Age + Sex + (1 + Time | ID)
   Data: lmer_long_sav
Control: lmerControl(optimizer = "Nelder_Mead")

     AIC      BIC   logLik deviance df.resid 
  3695.2   3745.6  -1837.6   3675.2     1133 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-5.4246 -0.4463  0.0152  0.4930  4.5899 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 ID       (Intercept) 0.64969  0.8060        
          Time        0.02665  0.1632   -0.09
 Residual             1.03454  1.0171        
Number of obs: 1143, groups:  ID, 120

Fixed effects:
              Estimate Std. Error         df t value            Pr(>|t|)    
(Intercept)   1.100222   0.555772 120.812080   1.980              0.0500 .  
AB_kode2     -0.461157   0.182792 120.138830  -2.523              0.0129 *  
Time         -0.034926   0.018390 120.174137  -1.899              0.0599 .  
Leg_0         0.850093   0.064781 119.957596  13.123 <0.0000000000000002 ***
Age          -0.002346   0.008229 120.625898  -0.285              0.7761    
Sex2         -0.291888   0.186074 119.875323  -1.569              0.1194    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
         (Intr) AB_kd2 Time   Leg_0  Age   
AB_kode2 -0.053                            
Time     -0.066  0.004                     
Leg_0    -0.768 -0.046 -0.009              
Age      -0.613 -0.135  0.002  0.050       
Sex2      0.029  0.060  0.005 -0.121 -0.146

Comparing the models

anova(sap_model, sap_rs_model, sap_rs_model_no_int)

Data: lmer_long_sav
Models:
sap_model: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex + (1 | ID)
sap_rs_model_no_int: Leg ~ 1 + AB_kode + Time + Leg_0 + Age + Sex + (1 + Time | ID)
sap_rs_model: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex + (1 + Time | ID)
                    npar    AIC    BIC  logLik deviance    Chisq Df
sap_model              9 3818.8 3864.2 -1900.4   3800.8            
sap_rs_model_no_int   10 3695.2 3745.6 -1837.6   3675.2 125.6784  1
sap_rs_model          11 3695.0 3750.5 -1836.5   3673.0   2.1395  1
                             Pr(>Chisq)    
sap_model                                  
sap_rs_model_no_int <0.0000000000000002 ***
sap_rs_model                     0.1436    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

performance::compare_performance(sap_model, sap_rs_model, sap_rs_model_no_int, rank = T)

Some of the nested models seem to be identical and probably only vary in
  their random effects.

# Comparison of Model Performance Indices

Name                |           Model | R2 (cond.) | R2 (marg.) |   ICC |  RMSE | Sigma | AIC weights | AICc weights | BIC weights | Performance-Score
------------------------------------------------------------------------------------------------------------------------------------------------------
sap_rs_model_no_int | lmerModLmerTest |      0.742 |      0.357 | 0.598 | 0.940 | 1.017 |       0.483 |        0.487 |       0.921 |            85.64%
sap_rs_model        | lmerModLmerTest |      0.744 |      0.365 | 0.596 | 0.940 | 1.017 |       0.517 |        0.513 |       0.079 |            82.99%
sap_model           | lmerModLmerTest |      0.687 |      0.371 | 0.502 | 1.073 | 1.128 |    6.72e-28 |     6.91e-28 |    1.59e-26 |            12.50%

The model without the interaction performs the best, but no extreme difference. As it is prespec. in the SAP, the interaction is kept.

Model assumptions (sap_rs_model)

resid <- residuals(sap_rs_model)
hist(resid, main = "Residual Histogram", breaks = 50 )

check_normality(sap_rs_model)

Warning: Non-normality of residuals detected (p < .001).

Shapiro-Wilk normality test

shapiro.test(resid)


    Shapiro-Wilk normality test

data:  resid
W = 0.96634, p-value = 0.000000000000001284

plot(check_normality(sap_rs_model), type = "qq")

plot(check_normality(sap_rs_model), type = "pp")

plot(sap_rs_model, show_residuals = TRUE, show_residuals_line = TRUE)

Outliers

check_outliers(sap_rs_model)

1 outlier detected: case 740.
- Based on the following method and threshold: cook (0.9).
- For variable: (Whole model).

plot(check_outliers(sap_rs_model), type = "dots")

Posterior predictive checks “Posterior predictive checks mean”simulating replicated data under the fitted model and then comparing these to the observed data” (Gelman and Hill, 2007, p. 158). Posterior predictive checks can be used to “look for systematic discrepancies between real and simulated data” (Gelman et al. 2014, p. 169).”

check_range … if TRUE, includes a plot with the minimum value of the original response against the minimum values of the replicated responses, and the same for the maximum value. This plot helps judging whether the variation in the original data is captured by the model or not (Gelman et al. 2020, pp.163). The minimum and maximum values of y should be inside the range of the related minimum and maximum values of yrep.

check_posterior_predictions(sap_rs_model, check_range = TRUE, iterations = 50)

Warning: Failed to compute posterior predictive checks with `re_formula=NULL`.
  Trying again with `re_formula=NA` now.

Binned residuals “Binned residual plots are achieved by”dividing the data into categories (bins) based on their fitted values, and then plotting the average residual versus the average fitted value for each bin.” (Gelman, Hill 2007: 97). If the model were true, one would expect about 95% of the residuals to fall inside the error bounds.”

plot(binned_residuals(sap_rs_model))

Warning: Removed 29 rows containing missing values (`geom_point()`).

Transformation

Jeg har forsøkt å sjekke effekten av å normalisere outcome, men uten større hell. Kjørt en normaliserings sjekk og resultatet forbedres noe med sqrt transformering, men residualene er fortsatt ikke normal fordelte.

bestNormalize::bestNormalize(lmer_long_sav$Leg)

Best Normalizing transformation with 1143 Observations
 Estimated Normality Statistics (Pearson P / df, lower => more normal):
 - arcsinh(x): 11.6549
 - Center+scale: 11.4934
 - Double Reversed Log_b(x+a): 11.6908
 - Exp(x): 40.7672
 - Log_b(x+a): 12.4506
 - orderNorm (ORQ): 11.4988
 - sqrt(x + a): 11.3853
 - Yeo-Johnson: 11.5593
Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
 
Based off these, bestNormalize chose:
Standardized sqrt(x + a) Transformation with 1143 nonmissing obs.:
 Relevant statistics:
 - a = 0 
 - mean (before standardization) = 2.402077 
 - sd (before standardization) = 0.4593078

Marginal means

emmeans package

Model with interaction

# Model with interaction
emmeans::emmeans(sap_rs_model, specs = "AB_kode")

NOTE: Results may be misleading due to involvement in interactions

 AB_kode emmean    SE  df lower.CL upper.CL
 1         6.29 0.157 124     5.98     6.60
 2         5.65 0.160 125     5.33     5.97

Results are averaged over the levels of: Sex 
Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95

Model without interaction

# Model without interaction
emmeans::emmeans(sap_rs_model_no_int, specs = "AB_kode")

 AB_kode emmean    SE  df lower.CL upper.CL
 1         6.20 0.145 146     5.92     6.49
 2         5.74 0.148 147     5.45     6.03

Results are averaged over the levels of: Sex 
Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95

Model predictions

marginaleffects package

Avg. predictions AB_kode and time

Code

sap_rs_model_avg_preds <- marginaleffects::avg_predictions(sap_rs_model, by = c("AB_kode", "Time"))

sap_rs_model_avg_preds |> 
  ggplot(aes(x = Time, y = estimate, colour = AB_kode, fill = AB_kode))+
    geom_line() +
    geom_ribbon(colour = NA, alpha=0.1, aes(ymin = conf.low, ymax = conf.high)) +
  scale_y_continuous(breaks = 0:10, limits = c(0, 10)) +
  scale_x_continuous(breaks = 1:10) +
  theme_classic() +
  labs(x = "Day", y = "Predicted pain\nNRS (0–10)\nMean, 95% CI") +
  theme(axis.title.y = element_text(angle = 0, vjust = 0.5))

Robust lmer model: sap_rs_model

Field, Andy P, and Rand R Wilcox. 2017. ‘Robust Statistical Methods: A Primer for Clinical Psychology and Experimental Psychopathology Researchers’. Behaviour Research and Therapy 98 (November): 19–38. https://doi.org/10.1016/j.brat.2017.05.013.

robust_rs_model <- rlmer(Leg ~ 1 + AB_kode + Time + AB_kode:Time + 
                                Leg_0 + Age + Sex + 
                                (1 + Time | ID),
                    REML = F,
                    data = lmer_long_sav)

summary(robust_rs_model)

Robust linear mixed model fit by DAStau 
Formula: Leg ~ 1 + AB_kode + Time + AB_kode:Time + Leg_0 + Age + Sex +      (1 + Time | ID) 
   Data: lmer_long_sav 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-7.8909 -0.5444  0.0039  0.5290  6.8259 

Random effects:
 Groups   Name        Variance Std.Dev. Corr
 ID       (Intercept) 0.75243  0.8674       
          Time        0.02339  0.1529   0.00
 Residual             0.67843  0.8237       
Number of obs: 1143, groups: ID, 120

Fixed effects:
                  Estimate   Std. Error t value
(Intercept)    0.697127652  0.574379804   1.214
AB_kode2      -0.370082216  0.195811033  -1.890
Time          -0.000001916  0.023522283   0.000
Leg_0          0.882188177  0.066995233  13.168
Age            0.002158565  0.008505458   0.254
Sex2          -0.305124613  0.192396251  -1.586
AB_kode2:Time -0.071621846  0.033543694  -2.135

Correlation of Fixed Effects:
            (Intr) AB_kd2 Time   Leg_0  Age    Sex2  
AB_kode2    -0.063                                   
Time        -0.060  0.187                            
Leg_0       -0.769 -0.045 -0.010                     
Age         -0.613 -0.129  0.004  0.050              
Sex2         0.030  0.059  0.005 -0.121 -0.147       
AB_kode2:Tm  0.043 -0.263 -0.701  0.004 -0.003 -0.002

Robustness weights for the residuals: 
 922 weights are ~= 1. The remaining 221 ones are summarized as
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.170   0.556   0.737   0.716   0.918   0.999 

Robustness weights for the random effects: 
 226 weights are ~= 1. The remaining 14 ones are summarized as
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.337   0.536   0.851   0.750   0.976   0.987 

Rho functions used for fitting:
  Residuals:
    eff: smoothed Huber (k = 1.345, s = 10) 
    sig: smoothed Huber, Proposal 2 (k = 1.345, s = 10) 
  Random Effects, variance component 1 (ID):
    eff: smoothed Huber (k = 5.14, s = 10) 
    vcp: smoothed Huber (k = 5.14, s = 10)

Est. marginal means (robust)

emmeans::emmeans(robust_rs_model, specs = "AB_kode")

NOTE: Results may be misleading due to involvement in interactions

 AB_kode emmean    SE  df asymp.LCL asymp.UCL
 1         6.38 0.161 Inf      6.07      6.70
 2         5.62 0.165 Inf      5.30      5.94

Results are averaged over the levels of: Sex 
Confidence level used: 0.95

With interaction

emmeans::emmeans(robust_rs_model, specs = c("AB_kode", "Time"))

 AB_kode Time emmean    SE  df asymp.LCL asymp.UCL
 1       5.47   6.38 0.161 Inf      6.07      6.70
 2       5.47   5.62 0.165 Inf      5.30      5.94

Results are averaged over the levels of: Sex 
Confidence level used: 0.95