Reading in the data

Combined with a ggplot and filter to view the data

library(lme4)
## Warning: package 'lme4' was built under R version 4.0.5
## Loading required package: Matrix
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
orthodont = read.csv("orthodont.csv")
str(orthodont)
## 'data.frame':    108 obs. of  4 variables:
##  $ distance: num  26 25 29 31 21.5 22.5 23 26.5 23 22.5 ...
##  $ age     : int  8 10 12 14 8 10 12 14 8 10 ...
##  $ Subject : chr  "M01" "M01" "M01" "M01" ...
##  $ Sex     : chr  "Male" "Male" "Male" "Male" ...
ggplot(orthodont, aes(x = age, y = distance, col = Subject)) + geom_point()

orthodont %>% filter(Subject == "M10")
##   distance age Subject  Sex
## 1     27.5   8     M10 Male
## 2     28.0  10     M10 Male
## 3     31.0  12     M10 Male
## 4     31.5  14     M10 Male

Linear Modeling

lm1 = lm(distance ~ age, data = orthodont)
summary(lm1)
## 
## Call:
## lm(formula = distance ~ age, data = orthodont)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5037 -1.5778 -0.1833  1.3519  6.3167 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  16.7611     1.2256  13.676  < 2e-16 ***
## age           0.6602     0.1092   6.047 2.25e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.537 on 106 degrees of freedom
## Multiple R-squared:  0.2565, Adjusted R-squared:  0.2495 
## F-statistic: 36.56 on 1 and 106 DF,  p-value: 2.248e-08
orthodont$age2 = orthodont$age - 8
lm2 = lm(distance ~ age2, data = orthodont)
summary(lm2)
## 
## Call:
## lm(formula = distance ~ age2, data = orthodont)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5037 -1.5778 -0.1833  1.3519  6.3167 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  22.0426     0.4085  53.957  < 2e-16 ***
## age2          0.6602     0.1092   6.047 2.25e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.537 on 106 degrees of freedom
## Multiple R-squared:  0.2565, Adjusted R-squared:  0.2495 
## F-statistic: 36.56 on 1 and 106 DF,  p-value: 2.248e-08
anova(lm2)
## Analysis of Variance Table
## 
## Response: distance
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## age2        1 235.36 235.356  36.562 2.248e-08 ***
## Residuals 106 682.34   6.437                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pl = ggplot(data=orthodont, aes(x=age2,y=distance)) + geom_point() 
pl + geom_line(aes(x=age2,y=predict(lm1)))

pl + geom_line(aes(x=age2,y=predict(lm1))) + facet_wrap(~Sex)

subject.select = c(paste0("M0",5:8),paste0("F0",2:5))
orthodont.select <- orthodont %>% 
  filter(Subject %in% subject.select)
ggplot(orthodont.select, aes(x=age2,y=distance)) + geom_point() + 
  geom_line(aes(x=age2,y=predict(lm1, newdata=orthodont.select))) + facet_wrap(~Subject, nrow=2)

ggplot(data=orthodont, aes(x=age2,y=distance)) + geom_point(size=3)  + 
  geom_line(aes(x=age2,y=distance,group=Subject)) + facet_wrap(~Sex)

Random intercept model

lme1 = lmer(distance ~ age2 + (1 | Subject), data = orthodont, REML = FALSE) 
summary(lme1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: distance ~ age2 + (1 | Subject)
##    Data: orthodont
## 
##      AIC      BIC   logLik deviance df.resid 
##    451.4    462.1   -221.7    443.4      104 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6870 -0.5386 -0.0123  0.4910  3.7470 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 4.294    2.072   
##  Residual             2.024    1.423   
## Number of obs: 108, groups:  Subject, 27
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 22.04259    0.45990   47.93
## age2         0.66019    0.06122   10.78
## 
## Correlation of Fixed Effects:
##      (Intr)
## age2 -0.399
ranef(lme1)
## $Subject
##     (Intercept)
## F01  -2.3689570
## F02  -0.9152788
## F03  -0.2443505
## F04   0.7620421
## F05  -1.2507430
## F06  -2.5925998
## F07  -0.9152788
## F08  -0.5798146
## F09  -2.5925998
## F10  -4.9408491
## F11   2.1038989
## M01   3.3339342
## M02  -0.5798146
## M03   0.2029351
## M04   2.3275417
## M05  -0.9152788
## M06   2.1038989
## M07  -0.2443505
## M08  -0.1325291
## M09   0.9856849
## M10   4.8994338
## M11  -0.3561719
## M12   0.2029351
## M13   0.2029351
## M14   0.7620421
## M15   1.6566133
## M16  -0.9152788
## 
## with conditional variances for "Subject"
orthodont$random_intercept = fitted(lme1)
ggplot(data=orthodont, aes(x=age,y=distance)) + geom_point() + 
  geom_line(aes(x=age,y=random_intercept)) + facet_wrap(~Subject, ncol=5) 

Random slope model

lme2 = lmer(distance ~ age2 + (age2 | Subject), data = orthodont, REML = FALSE) 
summary(lme2)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: distance ~ age2 + (age2 | Subject)
##    Data: orthodont
## 
##      AIC      BIC   logLik deviance df.resid 
##    451.2    467.3   -219.6    439.2      102 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3059 -0.4874  0.0076  0.4822  3.9228 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 3.3831   1.8393       
##           age2        0.0462   0.2149   0.24
##  Residual             1.7162   1.3100       
## Number of obs: 108, groups:  Subject, 27
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 22.04259    0.41206  53.493
## age2         0.66019    0.06992   9.442
## 
## Correlation of Fixed Effects:
##      (Intr)
## age2 -0.208
ranef(lme2)
## $Subject
##     (Intercept)         age2
## F01 -1.91620729 -0.174100410
## F02 -0.91340410  0.004864587
## F03 -0.34940342  0.045295126
## F04  0.80808992 -0.023894243
## F05 -0.84930187 -0.159611991
## F06 -2.11574426 -0.182768546
## F07 -0.74035281 -0.067266067
## F08 -0.18147042 -0.162459845
## F09 -2.04652375 -0.211620807
## F10 -4.26279794 -0.252144778
## F11  1.86687077  0.085819096
## M01  2.77396775  0.212837564
## M02 -0.59679350  0.010653726
## M03  0.11889106  0.033779136
## M04  2.41251032 -0.049774076
## M05 -0.94801435  0.019290718
## M06  1.86687077  0.085819096
## M07 -0.31479316  0.030868995
## M08  0.07916251 -0.087419049
## M09  0.66152433  0.129035201
## M10  4.30916763  0.215809993
## M11 -0.08576421 -0.110513316
## M12 -0.05416023  0.105909791
## M13 -0.71175510  0.380006277
## M14  0.77347966 -0.009468113
## M15  1.15630450  0.198318002
## M16 -0.74035281 -0.067266067
## 
## with conditional variances for "Subject"
plot(ranef(lme2)$Subject)

orthodont$random_slope = fitted(lme2)
ggplot(data=orthodont, aes(x=age,y=distance)) + geom_point() + 
  geom_line(aes(x=age,y=random_slope)) + facet_wrap(~Subject, ncol=5) 

Mixed-effect model comparison

AIC(lm2, lme1, lme2)
##      df      AIC
## lm2   3 511.5770
## lme1  4 451.3895
## lme2  6 451.2116
lme3 = lmer(distance ~ age2 + (1 | Subject), data = orthodont, REML = TRUE) 
summary(lme3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ age2 + (1 | Subject)
##    Data: orthodont
## 
## REML criterion at convergence: 447
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6645 -0.5351 -0.0129  0.4874  3.7218 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 4.472    2.115   
##  Residual             2.049    1.432   
## Number of obs: 108, groups:  Subject, 27
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 22.04259    0.46772   47.13
## age2         0.66019    0.06161   10.72
## 
## Correlation of Fixed Effects:
##      (Intr)
## age2 -0.395