Module 15 Assignment

Author

MJ Kemp

Read in and prepare data

library(lme4)
Loading required package: Matrix
library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
orthodont = read.csv("orthodont.csv")
str(orthodont)
'data.frame':   108 obs. of  4 variables:
 $ distance: num  26 25 29 31 21.5 22.5 23 26.5 23 22.5 ...
 $ age     : int  8 10 12 14 8 10 12 14 8 10 ...
 $ Subject : chr  "M01" "M01" "M01" "M01" ...
 $ Sex     : chr  "Male" "Male" "Male" "Male" ...

Initial plot

ggplot(orthodont, aes(x = age, y = distance, col = Subject)) + 
  geom_point() +
  theme_bw()

Filter by single Subject

orthodont |>
  filter(Subject == "M10")
  distance age Subject  Sex
1     27.5   8     M10 Male
2     28.0  10     M10 Male
3     31.0  12     M10 Male
4     31.5  14     M10 Male

Linear modeling

Create model

lm1 = lm(distance ~ age, data = orthodont)
summary(lm1)

Call:
lm(formula = distance ~ age, data = orthodont)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.5037 -1.5778 -0.1833  1.3519  6.3167 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  16.7611     1.2256  13.676  < 2e-16 ***
age           0.6602     0.1092   6.047 2.25e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.537 on 106 degrees of freedom
Multiple R-squared:  0.2565,    Adjusted R-squared:  0.2495 
F-statistic: 36.56 on 1 and 106 DF,  p-value: 2.248e-08

Change starting value to match student at start of study

orthodont$age2 = orthodont$age - 8
lm2 = lm(distance ~ age2, data = orthodont)
summary(lm2)

Call:
lm(formula = distance ~ age2, data = orthodont)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.5037 -1.5778 -0.1833  1.3519  6.3167 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  22.0426     0.4085  53.957  < 2e-16 ***
age2          0.6602     0.1092   6.047 2.25e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.537 on 106 degrees of freedom
Multiple R-squared:  0.2565,    Adjusted R-squared:  0.2495 
F-statistic: 36.56 on 1 and 106 DF,  p-value: 2.248e-08

Determine significance

anova(lm2)
Analysis of Variance Table

Response: distance
           Df Sum Sq Mean Sq F value    Pr(>F)    
age2        1 235.36 235.356  36.562 2.248e-08 ***
Residuals 106 682.34   6.437                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plot

Initial Linear Model plot

p <- ggplot(data = orthodont, aes(x = age2,y = distance)) + 
  geom_point() + 
  geom_line(aes(x = age2,y = predict(lm1)))  +
  theme_bw()
p

Plot separated by Sex

p + facet_wrap(~ Sex)

Plot separated by specified Subjects

subject.select <- c(paste0("M0", 5:8), paste0("F0", 2:5))
orthodont.select <- orthodont |> 
  filter(Subject %in% subject.select)

ggplot(orthodont.select, aes(x = age2,y = distance)) + 
  geom_point() + 
  geom_line(aes(x = age2,y = predict(lm1, newdata = orthodont.select))) + facet_wrap(~ Subject, nrow = 2) +
  theme_bw()

Plot displaying connecting line for each Subject

ggplot(data = orthodont, aes(x = age2, y = distance)) + 
  geom_point(size = 3) + 
  geom_line(aes(x = age2, y = distance, group = Subject)) + 
  facet_wrap(~ Sex) +
  theme_bw()


Mixed Effects

Initial linear mixed-effects model

lme1 = lmer(distance ~ age2 + (1 | Subject), 
            data = orthodont, REML = FALSE) 
summary(lme1)
Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: distance ~ age2 + (1 | Subject)
   Data: orthodont

      AIC       BIC    logLik -2*log(L)  df.resid 
    451.4     462.1    -221.7     443.4       104 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.6870 -0.5386 -0.0123  0.4910  3.7470 

Random effects:
 Groups   Name        Variance Std.Dev.
 Subject  (Intercept) 4.294    2.072   
 Residual             2.024    1.423   
Number of obs: 108, groups:  Subject, 27

Fixed effects:
            Estimate Std. Error t value
(Intercept) 22.04259    0.45990   47.93
age2         0.66019    0.06122   10.78

Correlation of Fixed Effects:
     (Intr)
age2 -0.399
ranef(lme1)
$Subject
    (Intercept)
F01  -2.3689570
F02  -0.9152788
F03  -0.2443505
F04   0.7620421
F05  -1.2507430
F06  -2.5925998
F07  -0.9152788
F08  -0.5798146
F09  -2.5925998
F10  -4.9408491
F11   2.1038989
M01   3.3339342
M02  -0.5798146
M03   0.2029351
M04   2.3275417
M05  -0.9152788
M06   2.1038989
M07  -0.2443505
M08  -0.1325291
M09   0.9856849
M10   4.8994338
M11  -0.3561719
M12   0.2029351
M13   0.2029351
M14   0.7620421
M15   1.6566133
M16  -0.9152788

with conditional variances for "Subject" 

Plot of initial linear mixed-effects model

orthodont$random_intercept <- fitted(lme1)
ggplot(data = orthodont, aes(x = age, y = distance)) + 
  geom_point() + 
  geom_line(aes(x = age, y = random_intercept)) + 
  facet_wrap(~Subject, ncol=5) 

Random Slope Model

Creation of model fit with both random intercept and slope

lme2 = lmer(distance ~ age2 + (age2 | Subject), 
            data = orthodont, REML = FALSE) 
summary(lme2)
Linear mixed model fit by maximum likelihood  ['lmerMod']
Formula: distance ~ age2 + (age2 | Subject)
   Data: orthodont

      AIC       BIC    logLik -2*log(L)  df.resid 
    451.2     467.3    -219.6     439.2       102 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3059 -0.4874  0.0076  0.4822  3.9228 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 Subject  (Intercept) 3.3831   1.8393        
          age2        0.0462   0.2149   0.24 
 Residual             1.7162   1.3100        
Number of obs: 108, groups:  Subject, 27

Fixed effects:
            Estimate Std. Error t value
(Intercept) 22.04259    0.41206  53.493
age2         0.66019    0.06992   9.442

Correlation of Fixed Effects:
     (Intr)
age2 -0.208
ranef(lme2)
$Subject
    (Intercept)         age2
F01 -1.91620729 -0.174100410
F02 -0.91340410  0.004864587
F03 -0.34940342  0.045295126
F04  0.80808992 -0.023894243
F05 -0.84930187 -0.159611991
F06 -2.11574426 -0.182768546
F07 -0.74035281 -0.067266067
F08 -0.18147042 -0.162459845
F09 -2.04652375 -0.211620807
F10 -4.26279794 -0.252144778
F11  1.86687077  0.085819096
M01  2.77396775  0.212837564
M02 -0.59679350  0.010653726
M03  0.11889106  0.033779136
M04  2.41251032 -0.049774076
M05 -0.94801435  0.019290718
M06  1.86687077  0.085819096
M07 -0.31479316  0.030868995
M08  0.07916251 -0.087419049
M09  0.66152433  0.129035201
M10  4.30916763  0.215809993
M11 -0.08576421 -0.110513316
M12 -0.05416023  0.105909791
M13 -0.71175510  0.380006277
M14  0.77347966 -0.009468113
M15  1.15630450  0.198318002
M16 -0.74035281 -0.067266067

with conditional variances for "Subject" 

Plot to display correlation between random effects (difference from grand mean intercept and difference from grand mean slope)

plot(ranef(lme2)$Subject)

Plot separated by Subject

orthodont$random_slope <- fitted(lme2)
ggplot(data = orthodont, aes(x = age, y = distance)) + 
  geom_point() + 
  geom_line(aes(x = age, y = random_slope)) + 
  facet_wrap(~ Subject, ncol=5) 


Mixed-effect Model Comparison

AIC(lm2, lme1, lme2)
     df      AIC
lm2   3 511.5770
lme1  4 451.3895
lme2  6 451.2116

Correcting bias with restricted maximum likelihood

lme3 = lmer(distance ~ age2 + (1 | Subject), 
            data = orthodont, REML = TRUE) 
summary(lme3)
Linear mixed model fit by REML ['lmerMod']
Formula: distance ~ age2 + (1 | Subject)
   Data: orthodont

REML criterion at convergence: 447

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.6645 -0.5351 -0.0129  0.4874  3.7218 

Random effects:
 Groups   Name        Variance Std.Dev.
 Subject  (Intercept) 4.472    2.115   
 Residual             2.049    1.432   
Number of obs: 108, groups:  Subject, 27

Fixed effects:
            Estimate Std. Error t value
(Intercept) 22.04259    0.46772   47.13
age2         0.66019    0.06161   10.72

Correlation of Fixed Effects:
     (Intr)
age2 -0.395