-This lab introduces mixed effects (heiarachical) modeling utilizing the lme4 package. The first step is to set my working directory and boot the package up.
setwd("~/Desktop/GEOG5680/module15")
library(lme4)
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.0.5
-For this example we will use some orthodontic meausurements on children.
orthodont = read.csv("orthodont.csv")
str(orthodont)
## 'data.frame': 108 obs. of 4 variables:
## $ distance: num 26 25 29 31 21.5 22.5 23 26.5 23 22.5 ...
## $ age : int 8 10 12 14 8 10 12 14 8 10 ...
## $ Subject : chr "M01" "M01" "M01" "M01" ...
## $ Sex : chr "Male" "Male" "Male" "Male" ...
-Now we plot distance against age
library(ggplot2)
ggplot(orthodont, aes(x = age, y = distance, col = Subject)) + geom_point()
-Lets load up dplyr to do some data manipulation
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
orthodont %>% filter(Subject == "M10")
## distance age Subject Sex
## 1 27.5 8 M10 Male
## 2 28.0 10 M10 Male
## 3 31.0 12 M10 Male
## 4 31.5 14 M10 Male
-Lets do a basic linear model of the data.
lml = lm(distance ~ age, data = orthodont)
summary(lml)
##
## Call:
## lm(formula = distance ~ age, data = orthodont)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.5037 -1.5778 -0.1833 1.3519 6.3167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.7611 1.2256 13.676 < 2e-16 ***
## age 0.6602 0.1092 6.047 2.25e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.537 on 106 degrees of freedom
## Multiple R-squared: 0.2565, Adjusted R-squared: 0.2495
## F-statistic: 36.56 on 1 and 106 DF, p-value: 2.248e-08
orthodont$age2 = orthodont$age - 8
lm2 = lm(distance ~ age2, data = orthodont)
summary(lm2)
##
## Call:
## lm(formula = distance ~ age2, data = orthodont)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.5037 -1.5778 -0.1833 1.3519 6.3167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.0426 0.4085 53.957 < 2e-16 ***
## age2 0.6602 0.1092 6.047 2.25e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.537 on 106 degrees of freedom
## Multiple R-squared: 0.2565, Adjusted R-squared: 0.2495
## F-statistic: 36.56 on 1 and 106 DF, p-value: 2.248e-08
anova(lm2)
## Analysis of Variance Table
##
## Response: distance
## Df Sum Sq Mean Sq F value Pr(>F)
## age2 1 235.36 235.356 36.562 2.248e-08 ***
## Residuals 106 682.34 6.437
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pl <- ggplot(data = orthodont, aes(x = age2, y = distance)) + geom_point()
pl + geom_line(aes(x = age2, y = predict(lml)))
- Now lets plot it by sex
pl + geom_line(aes(x = age2, y = predict(lml))) + facet_wrap(~Sex)
- Woah seems to be a slight overestimation on females. Lets see how this
relates to individual children. Here, we will subset our four boys and
four girls and plot their distance values relative to the model.
subject.select <- c(paste0("M0", 5:8), paste0("F0", 2:5))
orthodont.select <- orthodont %>%
filter(Subject %in% subject.select)
ggplot(orthodont.select, aes(x =age2, y = distance)) + geom_point() +
geom_line(aes(x = age2, y = predict(lml, newdata = orthodont.select))) + facet_wrap(~Subject, nrow = 2)
- Looks good, but a bit bias, we didnt think about how the data are
repeated meaasurements made on the same subjects. Lets do a more
convenient plot.
ggplot(data = orthodont, aes(x = age2, y = distance)) + geom_point(size = 3) +
geom_line(aes(x = age2, y = distance, group = Subject)) + facet_wrap(~Sex)
## A mixed effect model - Now lets make a mixed effect model by letting
the intercept vary, then the slope.
lme1 = lmer(distance ~ age2 + (1 | Subject), data = orthodont, REML = FALSE)
summary(lme1)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: distance ~ age2 + (1 | Subject)
## Data: orthodont
##
## AIC BIC logLik deviance df.resid
## 451.4 462.1 -221.7 443.4 104
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6870 -0.5386 -0.0123 0.4910 3.7470
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 4.294 2.072
## Residual 2.024 1.423
## Number of obs: 108, groups: Subject, 27
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 22.04259 0.45990 47.93
## age2 0.66019 0.06122 10.78
##
## Correlation of Fixed Effects:
## (Intr)
## age2 -0.399
ranef(lme1)
## $Subject
## (Intercept)
## F01 -2.3689570
## F02 -0.9152788
## F03 -0.2443505
## F04 0.7620421
## F05 -1.2507430
## F06 -2.5925998
## F07 -0.9152788
## F08 -0.5798146
## F09 -2.5925998
## F10 -4.9408491
## F11 2.1038989
## M01 3.3339342
## M02 -0.5798146
## M03 0.2029351
## M04 2.3275417
## M05 -0.9152788
## M06 2.1038989
## M07 -0.2443505
## M08 -0.1325291
## M09 0.9856849
## M10 4.8994338
## M11 -0.3561719
## M12 0.2029351
## M13 0.2029351
## M14 0.7620421
## M15 1.6566133
## M16 -0.9152788
##
## with conditional variances for "Subject"
orthodont$random_intercept <- fitted(lme1)
ggplot(data = orthodont, aes(x = age, y = distance)) + geom_point() +
geom_line(aes(x = age, y = random_intercept)) + facet_wrap(~Subject, ncol = 5)
## Random Slope Model - Lets fit a model with both random intercept and
slope
lme2 = lmer(distance ~ age2 + (age2 | Subject), data = orthodont, REML = FALSE)
summary(lme2)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: distance ~ age2 + (age2 | Subject)
## Data: orthodont
##
## AIC BIC logLik deviance df.resid
## 451.2 467.3 -219.6 439.2 102
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3059 -0.4874 0.0076 0.4822 3.9228
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 3.3831 1.8393
## age2 0.0462 0.2149 0.24
## Residual 1.7162 1.3100
## Number of obs: 108, groups: Subject, 27
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 22.04259 0.41206 53.493
## age2 0.66019 0.06992 9.442
##
## Correlation of Fixed Effects:
## (Intr)
## age2 -0.208
ranef(lme2)
## $Subject
## (Intercept) age2
## F01 -1.91620729 -0.174100410
## F02 -0.91340410 0.004864587
## F03 -0.34940342 0.045295126
## F04 0.80808992 -0.023894243
## F05 -0.84930187 -0.159611991
## F06 -2.11574426 -0.182768546
## F07 -0.74035281 -0.067266067
## F08 -0.18147042 -0.162459845
## F09 -2.04652375 -0.211620807
## F10 -4.26279794 -0.252144778
## F11 1.86687077 0.085819096
## M01 2.77396775 0.212837564
## M02 -0.59679350 0.010653726
## M03 0.11889106 0.033779136
## M04 2.41251032 -0.049774076
## M05 -0.94801435 0.019290718
## M06 1.86687077 0.085819096
## M07 -0.31479316 0.030868995
## M08 0.07916251 -0.087419049
## M09 0.66152433 0.129035201
## M10 4.30916763 0.215809993
## M11 -0.08576421 -0.110513316
## M12 -0.05416023 0.105909791
## M13 -0.71175510 0.380006277
## M14 0.77347966 -0.009468113
## M15 1.15630450 0.198318002
## M16 -0.74035281 -0.067266067
##
## with conditional variances for "Subject"
plot(ranef(lme2)$Subject)
- We can also plot this for each child
orthodont$random_slope <- fitted(lme2)
ggplot(data = orthodont, aes(x = age, y = distance)) + geom_point() +
geom_line(aes(x = age, y = random_slope)) + facet_wrap(~Subject, ncol = 5)
# Mixed-effect model comparison - Lets do an AIC analysis
AIC(lm2, lme1, lme2)
## df AIC
## lm2 3 511.5770
## lme1 4 451.3895
## lme2 6 451.2116
lme3 = lmer(distance ~ age2 + (1 | Subject), data = orthodont, REML = TRUE)
summary(lme3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ age2 + (1 | Subject)
## Data: orthodont
##
## REML criterion at convergence: 447
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6645 -0.5351 -0.0129 0.4874 3.7218
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 4.472 2.115
## Residual 2.049 1.432
## Number of obs: 108, groups: Subject, 27
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 22.04259 0.46772 47.13
## age2 0.66019 0.06161 10.72
##
## Correlation of Fixed Effects:
## (Intr)
## age2 -0.395