# load them
pacman::p_load(mlmRev, tidyverse, lme4, foreign, merTools, sjPlot)

# download a copy of data from the following link and save it to the
# data folder
# https://content.sph.harvard.edu/fitzmaur/ala2e/fat.dta

# input data
dta <- read.dta("https://content.sph.harvard.edu/fitzmaur/ala2e/fat.dta")

# 將變數重新命名
names(dta) <-c("ID", "Age", "Age_m", "Time", "PBF")

# show first 6 obs.
head(dta)

##   ID   Age Age_m  Time   PBF
## 1  1  9.32 13.19 -3.87  7.94
## 2  1 10.33 13.19 -2.86 15.65
## 3  1 11.24 13.19 -1.95 13.51
## 4  1 12.19 13.19 -1.00 23.23
## 5  1 13.24 13.19  0.05 10.52
## 6  1 14.24 13.19  1.05 20.45

# construct indicator variable for after menarche
dta <- dta %>% 
       mutate(ID = factor(ID), #ID是類別數據，轉換為factor            
              Time_a = ifelse(Time > 0, Time, 0), #新增月經來的時間變數Time_a
              Menarche = as.factor(ifelse(Time > 0, "T", "F"))) #新增月經有沒有來的變數Menarche，用as.factor轉換為factor

# show data structure
str(dta)

## 'data.frame':    1049 obs. of  7 variables:
##  $ ID      : Factor w/ 162 levels "1","2","3","4",..: 1 1 1 1 1 1 2 2 2 2 ...
##  $ Age     : num  9.32 10.33 11.24 12.19 13.24 ...
##  $ Age_m   : num  13.2 13.2 13.2 13.2 13.2 ...
##  $ Time    : num  -3.87 -2.86 -1.95 -1 0.05 ...
##  $ PBF     : num  7.94 15.65 13.51 23.23 10.52 ...
##  $ Time_a  : num  0 0 0 0 0.05 ...
##  $ Menarche: Factor w/ 2 levels "F","T": 1 1 1 1 2 2 1 1 1 1 ...
##  - attr(*, "datalabel")= chr ""
##  - attr(*, "time.stamp")= chr "23 Mar 2011 15:46"
##  - attr(*, "formats")= chr [1:5] "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
##  - attr(*, "types")= int [1:5] 254 254 254 254 254
##  - attr(*, "val.labels")= chr [1:5] "" "" "" "" ...
##  - attr(*, "var.labels")= chr [1:5] "" "" "" "" ...
##  - attr(*, "version")= int 12

# black and white theme                     
theme_set(theme_minimal())

1 Visdualization

#依據每個ID，劃出月經來潮前，年紀和體脂率的關係
ggplot(subset(dta, Menarche=="F"), #Menarche=="F"月經還沒來
       aes(Time, PBF, group=ID)) + 
 #geom_point(col=8)+
 geom_line(col=8)+
 stat_smooth(aes(group=1),method='lm',formula=y~x)+
 labs(x="Age (in year)",
      y="% Body fat")

# baseline model if you want one
mb_0 <- lmer(PBF ~ Time + (1 | ID), data = subset(dta, Menarche=='F')) #(1 | ID)是隨機截距項
tab_model(mb_0)

	PBF
Predictors	Estimates	CI	p
(Intercept)	20.20	19.06 – 21.34	<0.001
Time	-0.09	-0.33 – 0.16	0.494
Random Effects
σ²	10.02
τ₀₀ _ID	43.81
ICC	0.81
N _ID	162
Observations	497
Marginal R² / Conditional R²	0.000 / 0.814

mb_0結果解釋：

1.在月經來之前，全部受試者平均體脂率為20.20，信賴區間為19.06 – 21.34。

2.月經來之前，Time與PBF為負向關係。

ggplot(subset(dta, Menarche=="T"), #Menarche=="T"月經來之後
       aes(Time, PBF, group=ID)) +
# geom_point()+
 geom_line(col=8)+
 stat_smooth(aes(group=1),method='lm',formula=y~x)+
 labs(x="Age (in year)",
      y="% Body fat")

ma_0 <- lmer(PBF ~ Time + (Time | ID), data=subset(dta, Menarche=='T'))
tab_model(ma_0)

	PBF
Predictors	Estimates	CI	p
(Intercept)	22.59	21.42 – 23.75	<0.001
Time	1.97	1.68 – 2.25	<0.001
Random Effects
σ²	7.82
τ₀₀ _ID	46.21
τ₁₁ _ID.Time	1.42
ρ₀₁ _ID	-0.55
ICC	0.82
N _ID	160
Observations	552
Marginal R² / Conditional R²	0.123 / 0.844

解釋： 1.月經來之後，受試者平均體脂率為22.59，信賴區間為21.42 – 23.75

2.月經來之後，Time和PBF為正向關係，Time增加一單位，PBF增加1.97%

#X軸為每個受試者截距，Y軸為斜率，看看兩者之間的pattern
betas_a <- na.omit(coef(lmList(PBF ~ Time | ID, data=subset(dta, Menarche=='T'))))
car::dataEllipse(betas_a[,1],betas_a[,2])

## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4

m4 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data=dta) 

dta_full <- dta %>% 
  mutate(yhat = fitted(m4))

# individual connected lines different color for before and after
# add group regression lines
ggplot(dta_full, aes(Time, yhat, color = Menarche)) +
 geom_line(aes(group = ID)) +
 #geom_point(alpha = 0.5) +
 #stat_smooth(method="lm", aes(group = Menarche)) +
 guides(color = F) +
 labs(x = "Age relative to menarche (in years)", 
      y = "Percent body fat")

# model with 3 random effects associated with an individual
summary(m1 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: PBF ~ Time + Time_a + (Time + Time_a | ID)
##    Data: dta
## 
## REML criterion at convergence: 6062.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7742 -0.5901 -0.0359  0.5947  3.3798 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr       
##  ID       (Intercept) 45.940   6.778               
##           Time         1.631   1.277     0.29      
##           Time_a       2.749   1.658    -0.54 -0.83
##  Residual              9.473   3.078               
## Number of obs: 1049, groups:  ID, 162
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  21.3614     0.5645  37.838
## Time          0.4171     0.1572   2.654
## Time_a        2.0471     0.2280   8.980
## 
## Correlation of Fixed Effects:
##        (Intr) Time  
## Time    0.351       
## Time_a -0.515 -0.872

# baseline model if you want one
summary(m0 <- lmer(PBF ~ Time + (Time | ID), data=dta))

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: PBF ~ Time + (Time | ID)
##    Data: dta
## 
## REML criterion at convergence: 6196.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7577 -0.5961  0.0221  0.6169  3.0981 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  ID       (Intercept) 38.2178  6.1821        
##           Time         0.6938  0.8329   -0.24
##  Residual             11.6215  3.4090        
## Number of obs: 1049, groups:  ID, 162
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 23.04978    0.49985   46.11
## Time         1.55480    0.08484   18.33
## 
## Correlation of Fixed Effects:
##      (Intr)
## Time -0.199
## convergence code: 0
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)

# estimate random effects by simulation
m1_re <- REsim(m1)

# plot for random effects
plotREsim(m1_re)

# residual plot
plot(m1, resid(., type = "pearson") ~ fitted(.),
     abline = 0, pch = 20, cex = .8, id = 0.05,
     xlab = "Ftted values", ylab = "Pearson Residuals")

2 The end

```

Week6 In-class exercises 3

Ching-Fang Wu

Mon Oct 19 13:18:09 2020

1 Visdualization

2 The end