Exercises: Random coefficients models

p = c("dplyr", "magrittr", "lme4", "ggplot2","segmented", "gridExtra")
lapply(p, library, character.only = TRUE)

Q1

dat = read.table("data/reading_piat.txt", header = TRUE)

str(dat)

## 'data.frame':    267 obs. of  5 variables:
##  $ ID   : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ Wave : int  1 2 3 1 2 3 1 2 3 1 ...
##  $ Age_G: num  6.5 8.5 10.5 6.5 8.5 10.5 6.5 8.5 10.5 6.5 ...
##  $ Age  : num  6 8.33 10.33 6 8.5 ...
##  $ PIAT : int  18 35 59 18 25 28 18 23 32 18 ...

head(dat,10)

##    ID Wave Age_G     Age PIAT
## 1   1    1   6.5  6.0000   18
## 2   1    2   8.5  8.3333   35
## 3   1    3  10.5 10.3333   59
## 4   2    1   6.5  6.0000   18
## 5   2    2   8.5  8.5000   25
## 6   2    3  10.5 10.5833   28
## 7   3    1   6.5  6.0833   18
## 8   3    2   8.5  8.4167   23
## 9   3    3  10.5 10.4167   32
## 10  4    1   6.5  6.0000   18

#Check missing data
sum(sapply(dat, is.na))

## [1] 0

#reset age variable
dat %<>% mutate(age_minus = Age_G-6.5)
dat$Wave %<>% as.factor

#Plot the raw data based on individual level
ggplot(dat, aes(x = age_minus, y = PIAT, group = ID))+
  geom_point(alpha = 0.5)+
  geom_line(size = 0.5)+
  labs(x = "Years after 6.5 years old", y = "PIAT Score")+
  theme_bw()

#According to the model, each subject has its own intercept and slope
summary(m1 <- lmer(PIAT~age_minus+(1+age_minus|ID), data = dat))

## Linear mixed model fit by REML ['lmerMod']
## Formula: PIAT ~ age_minus + (1 + age_minus | ID)
##    Data: dat
## 
## REML criterion at convergence: 1819.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6539 -0.5413 -0.1497  0.3854  3.2811 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  ID       (Intercept) 11.427   3.380        
##           age_minus    4.486   2.118    0.22
##  Residual             27.043   5.200        
## Number of obs: 267, groups:  ID, 89
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  21.1629     0.6177   34.26
## age_minus     5.0309     0.2973   16.92
## 
## Correlation of Fixed Effects:
##           (Intr)
## age_minus -0.316

#residuals plot
plot(m1, xlab = "Fitted values", ylab = "Standardized residuals")

Q2

data("mtcars", package = "datasets")
dat = mtcars
str(dat)

## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

#Take a look
ggplot(dat, aes(x = wt, y = mpg))+
  geom_point()+
  theme_bw()

#Overall lm
summary(m0 <- lm(mpg~wt, data = dat))

## 
## Call:
## lm(formula = mpg ~ wt, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5432 -2.3647 -0.1252  1.4096  6.8727 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.2851     1.8776  19.858  < 2e-16 ***
## wt           -5.3445     0.5591  -9.559 1.29e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7446 
## F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10

#Try to find the possible cut points: median, mean and 3QD
summary(dat$wt)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.513   2.581   3.325   3.217   3.610   5.424

#Using median as cut point
dat %<>% mutate(wt_larger = ifelse(wt<3.325,0,wt-3.325),
                index = as.factor(ifelse(wt<3.325, "lighter", "heavier")))

ggplot(dat, aes(x = wt, y = mpg))+
  geom_point()+
  stat_smooth(aes(group = index), method = "lm")+
  theme_bw()

summary(m1 <- segmented(m0, seg.Z = ~wt, psi = 3.325))

## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = m0, seg.Z = ~wt, psi = 3.325)
## 
## Estimated Break-Point(s):
##    Est. St.Err 
##  3.595  0.346 
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  42.5479     2.4221  17.566  < 2e-16 ***
## wt           -7.2840     0.8369  -8.704 1.88e-09 ***
## U1.wt         4.7917     1.5495   3.092       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.681 on 28 degrees of freedom
## Multiple R-Squared: 0.8213,  Adjusted R-squared: 0.8021 
## 
## Convergence attained in 4 iterations with relative change 2.851926e-16

anova(m0,m1)

## Analysis of Variance Table
## 
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + U1.wt + psi1.wt
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     30 278.32                              
## 2     28 201.27  2    77.055 5.3599 0.01069 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The performance of piecewise regression model is better than overall linear regression model
Try to use 3QD as cut point:

#75%
summary(dat$wt)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.513   2.581   3.325   3.217   3.610   5.424

dat %<>% mutate(wt_75 = ifelse(wt<3.61,0,wt-3.61),
                index_75 = as.factor(ifelse(wt<3.61, "lighter", "heavier")))

ggplot(dat, aes(x = wt, y = mpg))+
  geom_point()+
  stat_smooth(aes(group = index_75), method = "lm")+
  theme_bw()

summary(m2 <- segmented(m0, seg.Z = ~wt, psi = 3.61))

## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = m0, seg.Z = ~wt, psi = 3.61)
## 
## Estimated Break-Point(s):
##    Est. St.Err 
##  3.573  0.350 
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  42.6101     2.4194  17.612  < 2e-16 ***
## wt           -7.3103     0.8359  -8.745  1.7e-09 ***
## U1.wt         4.7721     1.5477   3.083       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.678 on 28 degrees of freedom
## Multiple R-Squared: 0.8217,  Adjusted R-squared: 0.8026 
## 
## Convergence attained in 2 iterations with relative change 1.425963e-16

anova(m0,m2)

## Analysis of Variance Table
## 
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + U1.wt + psi1.wt
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     30 278.32                              
## 2     28 200.82  2    77.506 5.4034 0.01036 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m1,m2)

## Analysis of Variance Table
## 
## Model 1: mpg ~ wt + U1.wt + psi1.wt
## Model 2: mpg ~ wt + U1.wt + psi1.wt
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1     28 201.27                      
## 2     28 200.82  0   0.45183

Its performance is not better than using median as cut point.
Try to use mean as cut point:

summary(dat$wt)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.513   2.581   3.325   3.217   3.610   5.424

dat %<>% mutate(wt_mean = ifelse(wt<3.217,0,wt-3.217),
                index_mean = as.factor(ifelse(wt<3.217, "lighter", "heavier")))

ggplot(dat, aes(x = wt, y = mpg))+
  geom_point()+
  stat_smooth(aes(group = index_mean), method = "lm")+
  theme_bw()

summary(m3 <- segmented(m0, seg.Z = ~wt, psi = 3.217))

## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = m0, seg.Z = ~wt, psi = 3.217)
## 
## Estimated Break-Point(s):
##    Est. St.Err 
##  3.592  0.347 
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  42.5569     2.4217  17.573  < 2e-16 ***
## wt           -7.2877     0.8367  -8.710 1.85e-09 ***
## U1.wt         4.7892     1.5492   3.091       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.681 on 28 degrees of freedom
## Multiple R-Squared: 0.8213,  Adjusted R-squared: 0.8022 
## 
## Convergence attained in 7 iterations with relative change 1.422492e-16

anova(m3,m0)

## Analysis of Variance Table
## 
## Model 1: mpg ~ wt + U1.wt + psi1.wt
## Model 2: mpg ~ wt
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     28 201.20                              
## 2     30 278.32 -2   -77.121 5.3663 0.01064 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m3,m1)

## Analysis of Variance Table
## 
## Model 1: mpg ~ wt + U1.wt + psi1.wt
## Model 2: mpg ~ wt + U1.wt + psi1.wt
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1     28 201.20                      
## 2     28 201.27  0 -0.066935

Using median as cut point is still better than other models.
Through the results, using median as cut point to conduct piecewise regression is the best model.

Q3

dat = read.csv("data/visual_search.csv", header = TRUE)

str(dat)

## 'data.frame':    132 obs. of  4 variables:
##  $ Participant: Factor w/ 33 levels "0042","0044",..: 19 16 18 25 28 22 24 23 26 27 ...
##  $ Dx         : Factor w/ 2 levels "Aphasic","Control": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Set.Size   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ RT         : num  786 936 751 1130 1211 ...

#check if any missing data exists
sum(sapply(dat, is.na))

## [1] 0

#The first Plot
ggplot(dat, aes(x = Set.Size, y = RT, group = Participant))+
  geom_point()+
  geom_line()+
  stat_smooth(aes(group = Dx), method = "lm", formula = y~x)+
  facet_wrap(~Dx)+
  theme_bw()+
  labs(x = "Set Size", y = "Response Time (ms)")

#Allow each participant has an individual intercept and slope:
summary(m1 <- lmer(RT~Set.Size*Dx+(Set.Size|Participant), data = dat))

## Linear mixed model fit by REML ['lmerMod']
## Formula: RT ~ Set.Size * Dx + (Set.Size | Participant)
##    Data: dat
## 
## REML criterion at convergence: 2186.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7578 -0.3130 -0.0750  0.3117  6.1372 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev. Corr
##  Participant (Intercept) 657552.9 810.90       
##              Set.Size       407.4  20.18   1.00
##  Residual                772433.1 878.88       
## Number of obs: 132, groups:  Participant, 33
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)         2078.75     270.98   7.671
## Set.Size              73.49      11.40   6.446
## DxControl          -1106.05     366.90  -3.015
## Set.Size:DxControl   -21.74      15.44  -1.408
## 
## Correlation of Fixed Effects:
##             (Intr) Set.Sz DxCntr
## Set.Size    -0.071              
## DxControl   -0.739  0.053       
## St.Sz:DxCnt  0.053 -0.739 -0.071

#The second plot
ggplot(dat, aes(x = Set.Size, y = RT, shape = Dx, linetype = Dx))+
  stat_summary(fun.y = mean, geom = "line")+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.data = mean_se, geom = "errorbar", linetype = "solid", width = 0.5)+
  scale_shape_manual(values = c(1,5,15,30))+
  labs(x = "Set Size", y = "Response Time (ms)", shape = "Group", linetype = "Group")+
  theme_bw()+
  theme(legend.position = c(0.18,0.8), 
        legend.background = element_rect(fill = "white", color = "black"))

#The third plot:pearson residuals plot
plot(m1, resid(.,type = "pearson")~fitted(.)|Dx,
     layout=c(2,1), abline = 0,
     xlab = "Fitted Values", ylab = "Pearson Residuals", pch = 20, cex = 0.8, aspect = 1.2)

Q5

dat = read.table("data/alcohol_use.txt", header = TRUE)

str(dat)

## 'data.frame':    246 obs. of  8 variables:
##  $ sid   : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ coa   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ sex   : int  0 0 0 1 1 1 1 1 1 1 ...
##  $ age14 : int  0 1 2 0 1 2 0 1 2 0 ...
##  $ alcuse: num  1.73 2 2 0 0 ...
##  $ peer  : num  1.265 1.265 1.265 0.894 0.894 ...
##  $ cpeer : num  0.247 0.247 0.247 -0.124 -0.124 ...
##  $ ccoa  : num  0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 0.549 ...

sum(sapply(dat, is.na))

## [1] 0

dat$coa %<>% as.factor
dat$sid %<>% as.factor

#Plot 
ggplot(dat, aes(x = peer, y = alcuse, shape = coa, linetype = coa))+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.y = mean, geom = "line")+
  labs(x = "Alcohol use among the teenager's peers", y = "Alcohol use of the teenager")+
  scale_shape_discrete(name = "Child of a alcoholic parent",breaks =c("0","1"),labels = c("No", "Yes"))+
  scale_linetype_discrete(name = "Child of a alcoholic parent",breaks =c("0","1"),labels = c("No", "Yes"))+
  theme_bw()+
  theme(legend.position = c(0.3,0.8), 
        legend.background = element_rect(fill = "white", color = "black"))

#Analysis: different intercepts on individual level
summary(m1 <- lmer(alcuse~coa+peer*age14+(1|sid), data = dat))

## Linear mixed model fit by REML ['lmerMod']
## Formula: alcuse ~ coa + peer * age14 + (1 | sid)
##    Data: dat
## 
## REML criterion at convergence: 621.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.30588 -0.66394 -0.05233  0.55906  2.60999 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sid      (Intercept) 0.3377   0.5811  
##  Residual             0.4823   0.6945  
## Number of obs: 246, groups:  sid, 82
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -0.31177    0.17225  -1.810
## coa1         0.56513    0.15878   3.559
## peer         0.69586    0.13219   5.264
## age14        0.42469    0.09346   4.544
## peer:age14  -0.15138    0.07481  -2.024
## 
## Correlation of Fixed Effects:
##            (Intr) coa1   peer   age14 
## coa1       -0.312                     
## peer       -0.725 -0.134              
## age14      -0.543  0.000  0.461       
## peer:age14  0.442  0.000 -0.566 -0.814

#different intercepts(individual) and slopes(age)
summary(m2 <- lmer(alcuse~coa+peer*age14+(1+age14|sid), data = dat))

## Linear mixed model fit by REML ['lmerMod']
## Formula: alcuse ~ coa + peer * age14 + (1 + age14 | sid)
##    Data: dat
## 
## REML criterion at convergence: 603.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.59995 -0.40432 -0.07739  0.44372  2.27436 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  sid      (Intercept) 0.2595   0.5094        
##           age14       0.1469   0.3832   -0.05
##  Residual             0.3373   0.5808        
## Number of obs: 246, groups:  sid, 82
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -0.31382    0.14871  -2.110
## coa1         0.57120    0.14898   3.834
## peer         0.69518    0.11322   6.140
## age14        0.42469    0.10690   3.973
## peer:age14  -0.15138    0.08556  -1.769
## 
## Correlation of Fixed Effects:
##            (Intr) coa1   peer   age14 
## coa1       -0.339                     
## peer       -0.708 -0.146              
## age14      -0.408  0.000  0.350       
## peer:age14  0.332  0.000 -0.429 -0.814

#model selection 
anova(m1,m2)

## refitting model(s) with ML (instead of REML)

## Data: dat
## Models:
## m1: alcuse ~ coa + peer * age14 + (1 | sid)
## m2: alcuse ~ coa + peer * age14 + (1 + age14 | sid)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m1  7 620.47 645.01 -303.23   606.47                             
## m2  9 606.70 638.25 -294.35   588.70 17.765      2  0.0001388 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Based on AIC results, m2 would be better.
That is, in different age stage, the effect of the two factors might differ.

dat$age14 %<>% as.factor
#Child of normal parents
p1 = ggplot(dat[dat$coa=="0",], aes(x = peer, y = alcuse))+
  geom_point(alpha = .5)+
  stat_smooth(aes(group = age14, color = age14), method = "lm", size = .5)+
  ggtitle("Child of normal parents")+
  labs(x = "Alcohol use among the teenager's peers", y = "Alcohol use of the teenager")+
  scale_color_discrete(name ="Age", labels = c("14","15","16"))+
  theme_bw()+
  theme(panel.grid = element_blank())
#Child of a alcoholic parent
p2 = ggplot(dat[dat$coa=="1",], aes(x = peer, y = alcuse))+
  geom_point(alpha = .5)+
  stat_smooth(aes(group = age14, color = age14), method = "lm", size = .5)+
  ggtitle("Child of a alcoholic parent")+
  labs(x = "Alcohol use among the teenager's peers", y = "Alcohol use of the teenager")+
  scale_color_discrete(name ="Age", labels = c("14","15","16"))+
  theme_bw()+
  theme(panel.grid = element_blank())

grid.arrange(p1,p2,nrow = 1, ncol = 2)

Q6

data("sleepstudy", package = "lme4")
dat = sleepstudy
str(dat)

## 'data.frame':    180 obs. of  3 variables:
##  $ Reaction: num  250 259 251 321 357 ...
##  $ Days    : num  0 1 2 3 4 5 6 7 8 9 ...
##  $ Subject : Factor w/ 18 levels "308","309","310",..: 1 1 1 1 1 1 1 1 1 1 ...

#Plot 
ggplot(dat, aes(x = Days, y = Reaction, group = Subject))+
  geom_point()+
  geom_line(size = 0.5)+
  labs(x = "Days of sleep deprivation", y = "Reaction Time (ms)")+
  theme_bw()

#Simple lm
summary(m0 <- lm(Reaction~Days, data = dat))

## 
## Call:
## lm(formula = Reaction ~ Days, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -110.848  -27.483    1.546   26.142  139.953 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  251.405      6.610  38.033  < 2e-16 ***
## Days          10.467      1.238   8.454 9.89e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.71 on 178 degrees of freedom
## Multiple R-squared:  0.2865, Adjusted R-squared:  0.2825 
## F-statistic: 71.46 on 1 and 178 DF,  p-value: 9.894e-15

plot(rstandard(m0), xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0)

#Analysis: different intercepts on individual level
summary(m1 <- lmer(Reaction~Days+(1|Subject), data = dat))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (1 | Subject)
##    Data: dat
## 
## REML criterion at convergence: 1786.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2257 -0.5529  0.0109  0.5188  4.2506 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 1378.2   37.12   
##  Residual              960.5   30.99   
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 251.4051     9.7467   25.79
## Days         10.4673     0.8042   13.02
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.371

plot(m1, xlab = "Fitted values", ylab = "Standardized residuals")

#different intercept and slope on individual level
summary(m2 <- lmer(Reaction~Days+(1+Days|Subject), data = dat))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (1 + Days | Subject)
##    Data: dat
## 
## REML criterion at convergence: 1743.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9536 -0.4634  0.0231  0.4634  5.1793 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  Subject  (Intercept) 612.09   24.740       
##           Days         35.07    5.922   0.07
##  Residual             654.94   25.592       
## Number of obs: 180, groups:  Subject, 18
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  251.405      6.825   36.84
## Days          10.467      1.546    6.77
## 
## Correlation of Fixed Effects:
##      (Intr)
## Days -0.138

plot(m2, xlab = "Fitted values", ylab = "Standardized residuals")

#Model selection
anova(m1,m2)

## refitting model(s) with ML (instead of REML)

## Data: dat
## Models:
## m1: Reaction ~ Days + (1 | Subject)
## m2: Reaction ~ Days + (1 + Days | Subject)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m1  4 1802.1 1814.8 -897.04   1794.1                             
## m2  6 1763.9 1783.1 -875.97   1751.9 42.139      2  7.072e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Based on residuals plots and AIC results, m2 is better
That is, sleep deprivation has differet effect on different individuals.

Q7

dat = read.table("data/lecithin.txt", header = TRUE)

str(dat)

## 'data.frame':    235 obs. of  4 variables:
##  $ Group  : Factor w/ 2 levels "L","P": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Visit  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Recall : int  20 14 7 6 9 9 7 18 6 10 ...
##  $ Patient: Factor w/ 48 levels "0P2","L26","L27",..: 24 25 26 27 28 29 30 31 32 33 ...

sum(sapply(dat, is.na))

## [1] 0

#The first plot
ggplot(dat, aes(x = Visit, y = Recall, group = Patient))+
  geom_point()+
  geom_line()+
  facet_wrap(~Group)+
  labs(x = "Visit", y = "Recall score")+
  theme_bw()

#The second plot
ggplot(dat, aes(x = Visit, y = Recall, shape = Group, linetype = Group))+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.y = mean, geom = "line")+
  stat_summary(fun.data = mean_se, geom = "errorbar", width = .1)+
  labs(x = "Visit", y = "Recall score")+
  theme_bw()+
  theme(legend.position = c(0.1,0.8), 
        legend.background = element_rect(fill = "white", color = "black"))

#simple lm
summary(m0 <-lm(Recall~Visit*Group, data = dat))

## 
## Call:
## lm(formula = Recall ~ Visit * Group, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2182  -2.9080   0.2909   3.3035  10.2909 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    5.8318     1.0087   5.781 2.39e-08 ***
## Visit          1.8773     0.3041   6.172 2.99e-09 ***
## GroupP         4.7482     1.3831   3.433 0.000707 ***
## Visit:GroupP  -2.6013     0.4170  -6.238 2.10e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.511 on 231 degrees of freedom
## Multiple R-squared:  0.2361, Adjusted R-squared:  0.2261 
## F-statistic: 23.79 on 3 and 231 DF,  p-value: 1.867e-13

plot(rstandard(m0), xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0)

#Different intercept per individual
summary(m1 <- lmer(Recall~Visit*Group+(1|Patient), data = dat))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Recall ~ Visit * Group + (1 | Patient)
##    Data: dat
## 
## REML criterion at convergence: 1140.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5563 -0.5452 -0.0379  0.5492  3.2297 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Patient  (Intercept) 16.524   4.065   
##  Residual              4.097   2.024   
## Number of obs: 235, groups:  Patient, 48
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)    5.8318     0.9777   5.965
## Visit          1.8773     0.1365  13.756
## GroupP         4.8325     1.3349   3.620
## Visit:GroupP  -2.5965     0.1879 -13.821
## 
## Correlation of Fixed Effects:
##             (Intr) Visit  GroupP
## Visit       -0.419              
## GroupP      -0.732  0.307       
## Visit:GropP  0.304 -0.726 -0.425

plot(m1, xlab = "Fitted values", ylab = "Standardized residuals")

#Different intercept and slope of visit per individual
summary(m2 <- lmer(Recall~Visit*Group+(1+Visit|Patient), data = dat))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Recall ~ Visit * Group + (1 + Visit | Patient)
##    Data: dat
## 
## REML criterion at convergence: 1127
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.66658 -0.42645  0.00929  0.42500  2.55927 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  Patient  (Intercept) 21.6432  4.6522        
##           Visit        0.4065  0.6376   -0.48
##  Residual              3.1159  1.7652        
## Number of obs: 235, groups:  Patient, 48
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)    5.8318     1.0675   5.463
## Visit          1.8773     0.1807  10.390
## GroupP         4.8319     1.4597   3.310
## Visit:GroupP  -2.5984     0.2482 -10.469
## 
## Correlation of Fixed Effects:
##             (Intr) Visit  GroupP
## Visit       -0.558              
## GroupP      -0.731  0.408       
## Visit:GropP  0.406 -0.728 -0.563

plot(m2, xlab = "Fitted values", ylab = "Standardized residuals")

anova(m1,m2)

## refitting model(s) with ML (instead of REML)

## Data: dat
## Models:
## m1: Recall ~ Visit * Group + (1 | Patient)
## m2: Recall ~ Visit * Group + (1 + Visit | Patient)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)   
## m1  6 1151.2 1171.9 -569.57   1139.2                            
## m2  8 1142.6 1170.3 -563.32   1126.6 12.501      2    0.00193 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

According to the results, the effects of time, treatment and interaction between the two folds might differ on individuals.
From the plots, the possible interpretation is that with time passing by, the difference between treatment groups became larger.

Exercises: Random coefficients models

Yu-Han, Luo

Q1

Q2

Q3

Q5

Q6

Q7