Mixed-effect models in R

-This lab introduces mixed effects (heiarachical) modeling utilizing the lme4 package. The first step is to set my working directory and boot the package up.

setwd("~/Desktop/GEOG5680/module15")
library(lme4)
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.0.5

Orthodontic data

-For this example we will use some orthodontic meausurements on children.

orthodont = read.csv("orthodont.csv")
str(orthodont)
## 'data.frame':    108 obs. of  4 variables:
##  $ distance: num  26 25 29 31 21.5 22.5 23 26.5 23 22.5 ...
##  $ age     : int  8 10 12 14 8 10 12 14 8 10 ...
##  $ Subject : chr  "M01" "M01" "M01" "M01" ...
##  $ Sex     : chr  "Male" "Male" "Male" "Male" ...

-Now we plot distance against age

library(ggplot2)
ggplot(orthodont, aes(x = age, y = distance, col = Subject)) + geom_point()

-Lets load up dplyr to do some data manipulation

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
orthodont %>% filter(Subject == "M10")
##   distance age Subject  Sex
## 1     27.5   8     M10 Male
## 2     28.0  10     M10 Male
## 3     31.0  12     M10 Male
## 4     31.5  14     M10 Male

A basic linear model

-Lets do a basic linear model of the data.

lml = lm(distance ~ age, data = orthodont)
summary(lml)
## 
## Call:
## lm(formula = distance ~ age, data = orthodont)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5037 -1.5778 -0.1833  1.3519  6.3167 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  16.7611     1.2256  13.676  < 2e-16 ***
## age           0.6602     0.1092   6.047 2.25e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.537 on 106 degrees of freedom
## Multiple R-squared:  0.2565, Adjusted R-squared:  0.2495 
## F-statistic: 36.56 on 1 and 106 DF,  p-value: 2.248e-08
orthodont$age2 = orthodont$age - 8
lm2 = lm(distance ~ age2, data = orthodont)
summary(lm2)
## 
## Call:
## lm(formula = distance ~ age2, data = orthodont)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5037 -1.5778 -0.1833  1.3519  6.3167 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  22.0426     0.4085  53.957  < 2e-16 ***
## age2          0.6602     0.1092   6.047 2.25e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.537 on 106 degrees of freedom
## Multiple R-squared:  0.2565, Adjusted R-squared:  0.2495 
## F-statistic: 36.56 on 1 and 106 DF,  p-value: 2.248e-08
anova(lm2)
## Analysis of Variance Table
## 
## Response: distance
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## age2        1 235.36 235.356  36.562 2.248e-08 ***
## Residuals 106 682.34   6.437                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pl <- ggplot(data = orthodont, aes(x = age2, y = distance)) + geom_point()
pl + geom_line(aes(x = age2, y = predict(lml)))

- Now lets plot it by sex

pl + geom_line(aes(x = age2, y = predict(lml))) + facet_wrap(~Sex)

- Woah seems to be a slight overestimation on females. Lets see how this relates to individual children. Here, we will subset our four boys and four girls and plot their distance values relative to the model.

subject.select <- c(paste0("M0", 5:8), paste0("F0", 2:5))
orthodont.select <- orthodont %>%
  filter(Subject %in% subject.select)
ggplot(orthodont.select, aes(x =age2, y = distance)) + geom_point() +
  geom_line(aes(x = age2, y = predict(lml, newdata = orthodont.select))) + facet_wrap(~Subject, nrow = 2)

- Looks good, but a bit bias, we didnt think about how the data are repeated meaasurements made on the same subjects. Lets do a more convenient plot.

ggplot(data = orthodont, aes(x = age2, y = distance)) + geom_point(size = 3) +
  geom_line(aes(x = age2, y = distance, group = Subject)) + facet_wrap(~Sex)

## A mixed effect model - Now lets make a mixed effect model by letting the intercept vary, then the slope.

lme1 = lmer(distance ~ age2 + (1 | Subject), data = orthodont, REML = FALSE)
summary(lme1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: distance ~ age2 + (1 | Subject)
##    Data: orthodont
## 
##      AIC      BIC   logLik deviance df.resid 
##    451.4    462.1   -221.7    443.4      104 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6870 -0.5386 -0.0123  0.4910  3.7470 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 4.294    2.072   
##  Residual             2.024    1.423   
## Number of obs: 108, groups:  Subject, 27
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 22.04259    0.45990   47.93
## age2         0.66019    0.06122   10.78
## 
## Correlation of Fixed Effects:
##      (Intr)
## age2 -0.399
ranef(lme1)
## $Subject
##     (Intercept)
## F01  -2.3689570
## F02  -0.9152788
## F03  -0.2443505
## F04   0.7620421
## F05  -1.2507430
## F06  -2.5925998
## F07  -0.9152788
## F08  -0.5798146
## F09  -2.5925998
## F10  -4.9408491
## F11   2.1038989
## M01   3.3339342
## M02  -0.5798146
## M03   0.2029351
## M04   2.3275417
## M05  -0.9152788
## M06   2.1038989
## M07  -0.2443505
## M08  -0.1325291
## M09   0.9856849
## M10   4.8994338
## M11  -0.3561719
## M12   0.2029351
## M13   0.2029351
## M14   0.7620421
## M15   1.6566133
## M16  -0.9152788
## 
## with conditional variances for "Subject"
orthodont$random_intercept <- fitted(lme1)
ggplot(data = orthodont, aes(x = age, y = distance)) + geom_point() +
  geom_line(aes(x = age, y = random_intercept)) + facet_wrap(~Subject, ncol = 5)

## Random Slope Model - Lets fit a model with both random intercept and slope

lme2 = lmer(distance ~ age2 + (age2 | Subject), data = orthodont, REML = FALSE)
summary(lme2)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: distance ~ age2 + (age2 | Subject)
##    Data: orthodont
## 
##      AIC      BIC   logLik deviance df.resid 
##    451.2    467.3   -219.6    439.2      102 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3059 -0.4874  0.0076  0.4822  3.9228 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 3.3831   1.8393       
##           age2        0.0462   0.2149   0.24
##  Residual             1.7162   1.3100       
## Number of obs: 108, groups:  Subject, 27
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 22.04259    0.41206  53.493
## age2         0.66019    0.06992   9.442
## 
## Correlation of Fixed Effects:
##      (Intr)
## age2 -0.208
ranef(lme2)
## $Subject
##     (Intercept)         age2
## F01 -1.91620729 -0.174100410
## F02 -0.91340410  0.004864587
## F03 -0.34940342  0.045295126
## F04  0.80808992 -0.023894243
## F05 -0.84930187 -0.159611991
## F06 -2.11574426 -0.182768546
## F07 -0.74035281 -0.067266067
## F08 -0.18147042 -0.162459845
## F09 -2.04652375 -0.211620807
## F10 -4.26279794 -0.252144778
## F11  1.86687077  0.085819096
## M01  2.77396775  0.212837564
## M02 -0.59679350  0.010653726
## M03  0.11889106  0.033779136
## M04  2.41251032 -0.049774076
## M05 -0.94801435  0.019290718
## M06  1.86687077  0.085819096
## M07 -0.31479316  0.030868995
## M08  0.07916251 -0.087419049
## M09  0.66152433  0.129035201
## M10  4.30916763  0.215809993
## M11 -0.08576421 -0.110513316
## M12 -0.05416023  0.105909791
## M13 -0.71175510  0.380006277
## M14  0.77347966 -0.009468113
## M15  1.15630450  0.198318002
## M16 -0.74035281 -0.067266067
## 
## with conditional variances for "Subject"
plot(ranef(lme2)$Subject)

- We can also plot this for each child

orthodont$random_slope <- fitted(lme2)
ggplot(data = orthodont, aes(x = age, y = distance)) + geom_point() +
  geom_line(aes(x = age, y = random_slope)) + facet_wrap(~Subject, ncol = 5)

# Mixed-effect model comparison - Lets do an AIC analysis

AIC(lm2, lme1, lme2)
##      df      AIC
## lm2   3 511.5770
## lme1  4 451.3895
## lme2  6 451.2116

Restricted maximum likelihood

lme3 = lmer(distance ~ age2 + (1 | Subject), data = orthodont, REML = TRUE)
summary(lme3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ age2 + (1 | Subject)
##    Data: orthodont
## 
## REML criterion at convergence: 447
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6645 -0.5351 -0.0129  0.4874  3.7218 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 4.472    2.115   
##  Residual             2.049    1.432   
## Number of obs: 108, groups:  Subject, 27
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 22.04259    0.46772   47.13
## age2         0.66019    0.06161   10.72
## 
## Correlation of Fixed Effects:
##      (Intr)
## age2 -0.395