# load them
pacman::p_load(mlmRev, tidyverse, lme4, foreign, merTools, sjPlot)
dta <- read.dta("C:/Users/HANK/Desktop/HOMEWORK/fat.dta")
names(dta) <-c("ID", "Age", "Age_m", "Time", "PBF")
# show first 6 obs.
head(dta)
## ID Age Age_m Time PBF
## 1 1 9.32 13.19 -3.87 7.94
## 2 1 10.33 13.19 -2.86 15.65
## 3 1 11.24 13.19 -1.95 13.51
## 4 1 12.19 13.19 -1.00 23.23
## 5 1 13.24 13.19 0.05 10.52
## 6 1 14.24 13.19 1.05 20.45
dta <- dta %>%
mutate(ID = factor(ID),
Time_a = ifelse(Time > 0, Time, 0),
Menarche = as.factor(ifelse(Time > 0, "T", "F")))
此步驟為“初經之後”再建構一個新的變數 –> Time_a,意思是以初經發生時的年齡為“0”,
以現在的年齡與之相減,若還未發生初經則記為“0”
str(dta)
## 'data.frame': 1049 obs. of 7 variables:
## $ ID : Factor w/ 162 levels "1","2","3","4",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ Age : num 9.32 10.33 11.24 12.19 13.24 ...
## $ Age_m : num 13.2 13.2 13.2 13.2 13.2 ...
## $ Time : num -3.87 -2.86 -1.95 -1 0.05 ...
## $ PBF : num 7.94 15.65 13.51 23.23 10.52 ...
## $ Time_a : num 0 0 0 0 0.05 ...
## $ Menarche: Factor w/ 2 levels "F","T": 1 1 1 1 2 2 1 1 1 1 ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr "23 Mar 2011 15:46"
## - attr(*, "formats")= chr [1:5] "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
## - attr(*, "types")= int [1:5] 254 254 254 254 254
## - attr(*, "val.labels")= chr [1:5] "" "" "" "" ...
## - attr(*, "var.labels")= chr [1:5] "" "" "" "" ...
## - attr(*, "version")= int 12
顯示新增變數之後的data
# black and white theme
theme_set(theme_minimal())
ggplot(subset(dta, Menarche=="F"),
aes(Time, PBF, group=ID)) + geom_point(col=8)+
geom_line(col=8)+
stat_smooth(aes(group=1),method='lm',formula=y~x)+
labs(x="Age (in year)",
y="% Body fat")
使用subset()函數,進行欄位條件搜尋。
以本區塊為例,目的在取出“dta”的“Menarche”欄位的資料–“F”,
意思是“未發生”初經者的資料。
隨後針對此欄資料繪圖(將各點連線),並為之加入一條迴歸線。
資料顯示:尚未發生初經之前“體脂肪”與“年齡”呈現近乎水平無相關,
意即初經尚未發生,年齡的提升與體脂肪沒有太大關係。
# baseline model if you want one
mb_0 <- lmer(PBF ~ Time + (1 | ID), data = subset(dta, Menarche=='F'))
tab_model(mb_0)
| PBF | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 20.20 | 19.06 – 21.34 | <0.001 |
| Time | -0.09 | -0.33 – 0.16 | 0.494 |
| Random Effects | |||
| σ2 | 10.02 | ||
| τ00 ID | 43.81 | ||
| ICC | 0.81 | ||
| N ID | 162 | ||
| Observations | 497 | ||
| Marginal R2 / Conditional R2 | 0.000 / 0.814 | ||
ggplot(subset(dta, Menarche=="T"),
aes(Time, PBF, group=ID)) +
# geom_point()+
geom_line(col=8)+
stat_smooth(aes(group=1),method='lm',formula=y~x)+
labs(x="Age (in year)",
y="% Body fat")
本區塊類似上一步驟
使用subset()函數,進行欄位條件搜尋。
本區塊為取出“dta”的“Menarche”欄位的資料–“T”,
意思是“已發生”初經者的資料。
隨後針對此欄資料繪圖(將各點連線),並為之加入一條迴歸線。
資料顯示:發生初經之後“體脂肪”與“年齡”呈現弱正相關,
意即初經發生後,體脂肪會隨著年齡的提升而緩緩增加。
ma_0 <- lmer(PBF ~ Time + (Time | ID), data=subset(dta, Menarche=='T'))
tab_model(ma_0)
| PBF | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 22.59 | 21.42 – 23.75 | <0.001 |
| Time | 1.97 | 1.68 – 2.25 | <0.001 |
| Random Effects | |||
| σ2 | 7.82 | ||
| τ00 ID | 46.21 | ||
| τ11 ID.Time | 1.42 | ||
| ρ01 ID | -0.55 | ||
| ICC | 0.82 | ||
| N ID | 160 | ||
| Observations | 552 | ||
| Marginal R2 / Conditional R2 | 0.123 / 0.844 | ||
betas_a <- na.omit(coef(lmList(PBF ~ Time | ID, data=subset(dta, Menarche=='T'))))
car::dataEllipse(betas_a[,1],betas_a[,2])
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
區塊中,運用na.omit函數刪除數據資料中所有不完整情況。
運用car::dataEllipse函數繪製橢圓圖。
(目前只認識到此部分,是否有同學願意教導呢?)
m4 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data=dta)
dta_full <- dta %>%
mutate(yhat = fitted(m4))
ggplot(dta_full, aes(Time, yhat, color = Menarche)) +
geom_line(aes(group = ID)) + geom_point(alpha = 0.5) + stat_smooth(method="lm", aes(group = Menarche)) +
guides(color = F) +
labs(x = "Age relative to menarche (in years)",
y = "Percent body fat")
## `geom_smooth()` using formula 'y ~ x'
把前面步驟所繪製,“發生初經之前”和“發生初經之後”的線連接,
並且以不同顏色作區分,“未發生”是紅色;“已發生”是藍色
(“F”是紅色;“T”是藍色) A
添加組迴歸線。
資料顯示:在Age0.0之後仍有紅色線,檢視原始資料後發現,這並非做錯,而是這些在“0.0”
之後的紅線指的是“尚未發生初經”,原始資料中顯示一直都是“F”,故該條線即便已經過
了“0.0”之後,依然是紅色的原因。
summary(m1 <- lmer(PBF ~ Time + Time_a + (Time + Time_a | ID), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: PBF ~ Time + Time_a + (Time + Time_a | ID)
## Data: dta
##
## REML criterion at convergence: 6062.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7742 -0.5901 -0.0359 0.5947 3.3798
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 45.940 6.778
## Time 1.631 1.277 0.29
## Time_a 2.749 1.658 -0.54 -0.83
## Residual 9.473 3.078
## Number of obs: 1049, groups: ID, 162
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 21.3614 0.5645 37.838
## Time 0.4171 0.1572 2.654
## Time_a 2.0471 0.2280 8.980
##
## Correlation of Fixed Effects:
## (Intr) Time
## Time 0.351
## Time_a -0.515 -0.872
# baseline model if you want one
summary(m0 <- lmer(PBF ~ Time + (Time | ID), data=dta))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: PBF ~ Time + (Time | ID)
## Data: dta
##
## REML criterion at convergence: 6196.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7577 -0.5961 0.0221 0.6169 3.0981
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 38.2178 6.1821
## Time 0.6938 0.8329 -0.24
## Residual 11.6215 3.4090
## Number of obs: 1049, groups: ID, 162
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 23.04978 0.49985 46.11
## Time 1.55480 0.08484 18.33
##
## Correlation of Fixed Effects:
## (Intr)
## Time -0.199
## convergence code: 0
## Model failed to converge with max|grad| = 0.00354498 (tol = 0.002, component 1)
m1_re <- REsim(m1)
head(m1_re)
## groupFctr groupID term mean median sd
## 1 ID 1 (Intercept) -4.644296 -4.848563 2.293899
## 2 ID 2 (Intercept) -2.290968 -2.095994 2.238481
## 3 ID 3 (Intercept) 2.478576 2.422190 1.979402
## 4 ID 4 (Intercept) -9.791658 -9.760906 2.267116
## 5 ID 5 (Intercept) -10.683183 -10.676090 2.138363
## 6 ID 6 (Intercept) 10.640994 10.508591 2.262929
plotREsim(m1_re)
plot(m1, resid(., type = "pearson") ~ fitted(.),
abline = 0, pch = 20, cex = .8, id = 0.05,
xlab = "Ftted values", ylab = "Pearson Residuals")
從“殘差圖”可以直觀看出所描繪的點都在以0為橫軸的直線上下隨機散佈,
因此迴歸直線對各個觀測值的擬合情況是良好的。
意即變數X與y之間有顯著的線性相關關係。
整體解釋:發生初經之後“體脂肪”與“年齡”呈正相關,也就是說在初經發生後,體脂肪
會隨著年齡的提升而緩慢增加。而初經發生前體脂肪與年齡並無太明顯的相關。 體脂肪的改變有分段的差別(初經前後的分別)
```