Load package
pacman::p_load(mlmRev, tidyverse, lme4, foreign, merTools, sjPlot)
# download a copy of data from the following link and save it to the
# data folder
# https://content.sph.harvard.edu/fitzmaur/ala2e/fat.dta
construct indicator variable for after menarche
dta <- dta %>%
mutate(ID = factor(ID),
Time_a = ifelse(Time > 0, Time, 0),#if time>0 is true return time value, if false return 0
Menarche = as.factor(ifelse(Time > 0, "T", "F")))
# show data structure
str(dta)
## 'data.frame': 1049 obs. of 7 variables:
## $ ID : Factor w/ 162 levels "1","2","3","4",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ Age : num 9.32 10.33 11.24 12.19 13.24 ...
## $ Age_m : num 13.2 13.2 13.2 13.2 13.2 ...
## $ Time : num -3.87 -2.86 -1.95 -1 0.05 ...
## $ PBF : num 7.94 15.65 13.51 23.23 10.52 ...
## $ Time_a : num 0 0 0 0 0.05 ...
## $ Menarche: Factor w/ 2 levels "F","T": 1 1 1 1 2 2 1 1 1 1 ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr "23 Mar 2011 15:46"
## - attr(*, "formats")= chr [1:5] "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
## - attr(*, "types")= int [1:5] 254 254 254 254 254
## - attr(*, "val.labels")= chr [1:5] "" "" "" "" ...
## - attr(*, "var.labels")= chr [1:5] "" "" "" "" ...
## - attr(*, "version")= int 12
# black and white theme
theme_set(theme_minimal())
plot-before MC
ggplot(subset(dta, Menarche=="F"),
aes(Time, PBF, group=ID)) +
#geom_point(col=8)+
geom_line(col=8)+
stat_smooth(aes(group=1),method='lm',formula=y~x)+
labs(x="Age (in year)",
y="% Body fat")

model mb_0-before MC
mb_0 <- lmer(PBF ~ Time + (1 | ID), data = subset(dta, Menarche=='F'))
sjPlot::tab_model(mb_0)
|
PBF
|
Predictors
|
Estimates
|
CI
|
p
|
(Intercept)
|
20.20
|
19.06 – 21.34
|
<0.001
|
Time
|
-0.09
|
-0.33 – 0.16
|
0.494
|
Random Effects
|
σ2
|
10.02
|
τ00 ID
|
43.81
|
ICC
|
0.81
|
N ID
|
162
|
Observations
|
497
|
Marginal R2 / Conditional R2
|
0.000 / 0.814
|
Plot-After MC
ggplot(subset(dta, Menarche=="T"),
aes(Time, PBF, group=ID)) +
# geom_point()+
geom_line(col=8)+
stat_smooth(aes(group=1),method='lm',formula=y~x)+
labs(x="Age (in year)",
y="% Body fat")

model ma_0-After MC
ma_0 <- lmer(PBF ~ Time + (Time | ID), data=subset(dta, Menarche=='T'))
sjPlot::tab_model(ma_0)
|
PBF
|
Predictors
|
Estimates
|
CI
|
p
|
(Intercept)
|
22.59
|
21.42 – 23.75
|
<0.001
|
Time
|
1.97
|
1.68 – 2.25
|
<0.001
|
Random Effects
|
σ2
|
7.82
|
τ00 ID
|
46.21
|
τ11 ID.Time
|
1.42
|
ρ01 ID
|
-0.55
|
ICC
|
0.82
|
N ID
|
160
|
Observations
|
552
|
Marginal R2 / Conditional R2
|
0.123 / 0.844
|
Confidence Ellipses
betas_a <- na.omit(coef(lmList(PBF ~ Time | ID, data=subset(dta, Menarche=='T'))))
car::dataEllipse(betas_a[,1],betas_a[,2])
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4

full model
m4 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data=dta)
dta_full <- dta %>%
mutate(yhat = fitted(m4))
# individual connected lines different color for before and after
# add group regression lines
ggplot(dta_full, aes(Time, yhat, color = Menarche)) +
geom_line(aes(group = ID)) +
#geom_point(alpha = 0.5) +
#stat_smooth(method="lm", aes(group = Menarche)) +
guides(color = F) +
labs(x = "Age relative to menarche (in years)",
y = "Percent body fat")

model with 3 random effects associated with an individual
summary(m1 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: PBF ~ Time + Time_a + (Time + Time_a | ID)
## Data: dta
##
## REML criterion at convergence: 6062.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7742 -0.5900 -0.0359 0.5946 3.3798
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 45.942 6.778
## Time 1.631 1.277 0.29
## Time_a 2.750 1.658 -0.54 -0.83
## Residual 9.473 3.078
## Number of obs: 1049, groups: ID, 162
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 21.3614 0.5646 37.837
## Time 0.4171 0.1572 2.654
## Time_a 2.0471 0.2280 8.980
##
## Correlation of Fixed Effects:
## (Intr) Time
## Time 0.351
## Time_a -0.515 -0.872
baseline model
summary(m0 <- lmer(PBF ~ Time + (Time | ID), data=dta))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: PBF ~ Time + (Time | ID)
## Data: dta
##
## REML criterion at convergence: 6196.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7577 -0.5961 0.0221 0.6169 3.0981
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 38.2178 6.1821
## Time 0.6938 0.8329 -0.24
## Residual 11.6215 3.4090
## Number of obs: 1049, groups: ID, 162
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 23.04978 0.49985 46.11
## Time 1.55480 0.08484 18.33
##
## Correlation of Fixed Effects:
## (Intr)
## Time -0.199
## convergence code: 0
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)
estimate random effects by simulation
m1_re <- REsim(m1)
plot for random effects
plotREsim(m1_re)

residual plot
plot(m1, resid(., type = "pearson") ~ fitted(.),
abline = 0, pch = 20, cex = .8, id = 0.05,
xlab = "Ftted values", ylab = "Pearson Residuals")

The end