关于小鸡体重的数据:
d <- ChickWeight |> as_tibble()
glimpse(d)
## Rows: 578
## Columns: 4
## $ weight <dbl> 42, 51, 59, 64, 76, 93, 106, 125, 149, 171, 199, 205, 40, 49, 5…
## $ Time <dbl> 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 21, 0, 2, 4, 6, 8, 10, 1…
## $ Chick <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ Diet <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
lmer
包:频率学派的方法## 二次的增长函数
fit_lmer <- lmer(
weight ~ 1 + Time + I(Time^2) + Diet +
(1 + Time + I(Time^2) | Chick), # 随机效应项
data = ChickWeight)
# 直接summary
fit_lmer |> summary()
## Linear mixed model fit by REML ['lmerMod']
## Formula: weight ~ 1 + Time + I(Time^2) + Diet + (1 + Time + I(Time^2) |
## Chick)
## Data: ChickWeight
##
## REML criterion at convergence: 4241
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1293 -0.4610 -0.0002 0.4522 3.4185
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Chick (Intercept) 35.8978 5.9915
## Time 12.5458 3.5420 -0.96
## I(Time^2) 0.0515 0.2269 0.36 -0.61
## Residual 43.1002 6.5651
## Number of obs: 578, groups: Chick, 50
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 35.19599 1.26017 27.930
## Time 5.89251 0.52770 11.166
## I(Time^2) 0.11786 0.03332 3.538
## Diet2 2.42558 1.36974 1.771
## Diet3 3.54705 1.36974 2.590
## Diet4 5.42361 1.37019 3.958
##
## Correlation of Fixed Effects:
## (Intr) Time I(T^2) Diet2 Diet3
## Time -0.748
## I(Time^2) 0.321 -0.632
## Diet2 -0.365 -0.004 0.000
## Diet3 -0.365 -0.004 0.000 0.340
## Diet4 -0.365 -0.005 0.001 0.340 0.340
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0348439 (tol = 0.002, component 1)
# 输出估计出来的模型
# equatiomatic::extract_eq(fit_lmer, use_coefs = TRUE)
提取固定效应的系数
# broom方法
coef_lmer <- broom.mixed::tidy(fit_lmer) |>
mutate(across(is.numeric, ~round(., 2)))
coef_lmer |>
filter(effect == "fixed")
effect | group | term | estimate | std.error | statistic |
---|---|---|---|---|---|
fixed | NA | (Intercept) | 35.20 | 1.26 | 27.93 |
fixed | NA | Time | 5.89 | 0.53 | 11.17 |
fixed | NA | I(Time^2) | 0.12 | 0.03 | 3.54 |
fixed | NA | Diet2 | 2.43 | 1.37 | 1.77 |
fixed | NA | Diet3 | 3.55 | 1.37 | 2.59 |
fixed | NA | Diet4 | 5.42 | 1.37 | 3.96 |
提取随机效应的参数
coef_lmer |>
filter(effect == "ran_pars")
effect | group | term | estimate | std.error | statistic |
---|---|---|---|---|---|
ran_pars | Chick | sd__(Intercept) | 5.99 | NA | NA |
ran_pars | Chick | cor__(Intercept).Time | -0.96 | NA | NA |
ran_pars | Chick | cor__(Intercept).I(Time^2) | 0.36 | NA | NA |
ran_pars | Chick | sd__Time | 3.54 | NA | NA |
ran_pars | Chick | cor__Time.I(Time^2) | -0.61 | NA | NA |
ran_pars | Chick | sd__I(Time^2) | 0.23 | NA | NA |
ran_pars | Residual | sd__Observation | 6.57 | NA | NA |
各个组估计的adjusted系数(固定效应+随机效应)
coef_grp_lmer <- coef(fit_lmer)$Chick |>
rownames_to_column() |>
rename(grp = rowname)
各个组的随机效应及随机效应condval
和随机效应标准误condsd
(随机效应):
randef_lmer <- ranef(fit_lmer, condVar = TRUE) |>
as.data.frame() |> # 这步骤很重要
as_tibble()
glimpse(randef_lmer)
## Rows: 150
## Columns: 5
## $ grpvar <chr> "Chick", "Chick", "Chick", "Chick", "Chick", "Chick", "Chick",…
## $ term <fct> (Intercept), (Intercept), (Intercept), (Intercept), (Intercept…
## $ grp <fct> 18, 16, 15, 13, 9, 20, 10, 8, 17, 19, 4, 6, 11, 3, 1, 12, 2, 5…
## $ condval <dbl> 2.16109836, 10.48147280, 5.44893164, 8.53089245, 0.83675104, 7…
## $ condsd <dbl> 4.419325, 1.987085, 1.844111, 1.207257, 1.207257, 1.207257, 1.…
各个组的adjusted系数及其标准误:
计算方法参见:https://vasishth.github.io/Freq_CogSci/linear-mixed-models.html
# 固定效应及其标准误
temp <- coef_lmer |>
filter(effect == "fixed") |>
select(term, estimate, std.error)
# 合并到随机效应及其标准误
res <- randef_lmer |>
left_join(temp, by = join_by(term == term))
# 计算adjusted系数(固定效应+随机效应)及其标准误
res <- res |>
mutate(coef = condval + estimate,
coef_sd = sqrt(condsd^2 + std.error^2)) |>
select(term, grp, coef, coef_sd)
glimpse(res)
## Rows: 150
## Columns: 4
## $ term <chr> "(Intercept)", "(Intercept)", "(Intercept)", "(Intercept)", "(…
## $ grp <fct> 18, 16, 15, 13, 9, 20, 10, 8, 17, 19, 4, 6, 11, 3, 1, 12, 2, 5…
## $ coef <dbl> 37.36110, 45.68147, 40.64893, 43.73089, 36.03675, 42.47255, 40…
## $ coef_sd <dbl> 4.595436, 2.352893, 2.233461, 1.745013, 1.745013, 1.745013, 1.…
pred_lmer <- predictions(fit_lmer,
newdata = datagrid(Chick = ChickWeight$Chick,
Time = 0:21))
pred_lmer |>
ggplot(aes(Time, estimate, level = Chick)) +
geom_line() +
labs(y = "Predicted weight", x = "Time", title = "Quadratic growth model")
pred_lmer <- predictions(
fit_lmer,
newdata = datagrid(Chick = NA,
Diet = 1:4,
Time = 0:21),
re.form = NA)
ggplot(pred_lmer, aes(x = Time, y = estimate, ymin = conf.low, ymax = conf.high)) +
geom_ribbon(alpha = .1, fill = "red") +
geom_line() +
facet_wrap(~ Diet, labeller = label_both) +
labs(title = "Population-level trajectories")
brms
包:贝叶斯的方法fit_brms <- brm(
weight ~ 1 + Time + I(Time^2) + Diet +
(1 + Time + I(Time^2) | Chick), # 随机效应项
data = d,
file = "fit_brms")
summary(fit_brms)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: weight ~ 1 + Time + I(Time^2) + Diet + (1 + Time + I(Time^2) | Chick)
## Data: d (Number of observations: 578)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~Chick (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 5.91 1.00 4.12 8.08 1.00 1910
## sd(Time) 3.52 0.41 2.83 4.45 1.00 1062
## sd(ITimeE2) 0.23 0.03 0.19 0.29 1.00 1826
## cor(Intercept,Time) -0.93 0.04 -0.99 -0.82 1.01 840
## cor(Intercept,ITimeE2) 0.29 0.17 -0.07 0.59 1.01 663
## cor(Time,ITimeE2) -0.57 0.10 -0.74 -0.36 1.00 1550
## Tail_ESS
## sd(Intercept) 2749
## sd(Time) 1769
## sd(ITimeE2) 2246
## cor(Intercept,Time) 1701
## cor(Intercept,ITimeE2) 1466
## cor(Time,ITimeE2) 2473
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 35.27 1.35 32.67 37.99 1.00 1311 2320
## Time 5.86 0.53 4.79 6.88 1.01 908 1329
## ITimeE2 0.12 0.03 0.05 0.19 1.00 1193 1960
## Diet2 2.47 1.61 -0.61 6.04 1.01 506 117
## Diet3 3.58 1.65 0.35 6.68 1.00 942 2575
## Diet4 5.28 1.56 2.12 8.31 1.00 4015 3075
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 6.60 0.22 6.18 7.06 1.00 3201 2347
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
提取固定效应的系数
# broom方法
coef_brms <- broom.mixed::tidy(fit_brms) |>
mutate(across(is.numeric, ~round(., 2)))
coef_brms |>
filter(effect == "fixed")
effect | component | group | term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|---|---|---|
fixed | cond | NA | (Intercept) | 35.27 | 1.35 | 32.67 | 37.99 |
fixed | cond | NA | Time | 5.86 | 0.53 | 4.79 | 6.88 |
fixed | cond | NA | ITimeE2 | 0.12 | 0.03 | 0.05 | 0.19 |
fixed | cond | NA | Diet2 | 2.47 | 1.61 | -0.61 | 6.04 |
fixed | cond | NA | Diet3 | 3.58 | 1.65 | 0.35 | 6.68 |
fixed | cond | NA | Diet4 | 5.28 | 1.56 | 2.12 | 8.31 |
提取随机效应的参数
coef_brms |>
filter(effect == "ran_pars")
effect | component | group | term | estimate | std.error | conf.low | conf.high |
---|---|---|---|---|---|---|---|
ran_pars | cond | Chick | sd__(Intercept) | 5.91 | 1.00 | 4.12 | 8.08 |
ran_pars | cond | Chick | sd__Time | 3.52 | 0.41 | 2.83 | 4.45 |
ran_pars | cond | Chick | sd__ITimeE2 | 0.23 | 0.03 | 0.19 | 0.29 |
ran_pars | cond | Chick | cor__(Intercept).Time | -0.93 | 0.04 | -0.99 | -0.82 |
ran_pars | cond | Chick | cor__(Intercept).ITimeE2 | 0.29 | 0.17 | -0.07 | 0.59 |
ran_pars | cond | Chick | cor__Time.ITimeE2 | -0.57 | 0.10 | -0.74 | -0.36 |
ran_pars | cond | Residual | sd__Observation | 6.60 | 0.22 | 6.18 | 7.06 |
# 用spread draws之前先查看一下抽样的系数
get_variables(fit_brms)[1:20]
## [1] "b_Intercept" "b_Time"
## [3] "b_ITimeE2" "b_Diet2"
## [5] "b_Diet3" "b_Diet4"
## [7] "sd_Chick__Intercept" "sd_Chick__Time"
## [9] "sd_Chick__ITimeE2" "cor_Chick__Intercept__Time"
## [11] "cor_Chick__Intercept__ITimeE2" "cor_Chick__Time__ITimeE2"
## [13] "sigma" "r_Chick[18,Intercept]"
## [15] "r_Chick[16,Intercept]" "r_Chick[15,Intercept]"
## [17] "r_Chick[13,Intercept]" "r_Chick[9,Intercept]"
## [19] "r_Chick[20,Intercept]" "r_Chick[10,Intercept]"
各个组的adjusted系数:
grp_coef <- fit_brms |>
spread_draws(b_Intercept, # 截距
b_Time,
b_ITimeE2,
r_Chick[grp, term]) # 随机效应
# grp_coef
# b_Intercept : 固定效应截距
# b_ITimeE2和b_Time: 固定效应系数
# term : 随机效应的项目
# Chick_id: 小鸡id
# spread_draws会根据chain和draw来排布样本
grp_coef_b_Intercept <-
grp_coef |>
mutate(grp_b_Intercept = b_Intercept + r_Chick) |>
filter(term == "Intercept") |>
select(1:3, 7, 10) |>
mean_qi()
## Adding missing grouping variables: `term`
grp_coef_b_Time <-
grp_coef |>
mutate(grp_b_Time = b_Time + r_Chick) |>
filter(term == "Time") |>
select(1:3, 7, 10) |>
mean_qi()
## Adding missing grouping variables: `term`
grp_coef_b_Time2 <-
grp_coef |>
mutate(grp_b_Time2 = b_ITimeE2 + r_Chick) |>
filter(term == "ITimeE2") |>
select(1:3, 7, 10) |>
mean_qi()
## Adding missing grouping variables: `term`
pred_brms <- predictions(fit_brms,
newdata = datagrid(Chick = ChickWeight$Chick,
Time = 0:21))
pred_brms |>
ggplot(aes(Time, estimate, level = Chick)) +
geom_line() +
labs(y = "Predicted weight", x = "Time", title = "Quadratic growth model")
pred_brms <- predictions(
fit_brms,
newdata = datagrid(Chick = NA,
Diet = 1:4,
Time = 0:21),
re_formula = NA) |>
posterior_draws()
ggplot(pred_brms, aes(x = Time, y = draw)) +
stat_lineribbon() +
facet_wrap(~ Diet, labeller = label_both) +
labs(title = "Population-level trajectories") +
scale_fill_simpsons()
# lme4的结果
grp_lmer <- res |>
mutate(term = case_when(
term == "(Intercept)" ~ "Intercept",
term == "Time" ~ "Time",
term == "I(Time^2)" ~ "ITimeE2"
),
grp = as.integer(as.character(grp)),
.lower = coef - 2*coef_sd,
.upper = coef + 2*coef_sd) |>
select(term, grp, .estimate = coef, .lower, .upper) |>
mutate(pkg = "lme4") |>
arrange(desc(term), grp)
head(grp_lmer)
term | grp | .estimate | .lower | .upper | pkg |
---|---|---|---|---|---|
Time | 1 | 2.621986 | 0.7128218 | 4.531151 | lme4 |
Time | 2 | 4.717383 | 2.8082189 | 6.626548 | lme4 |
Time | 3 | 4.649811 | 2.7406468 | 6.558976 | lme4 |
Time | 4 | 4.080015 | 2.1708508 | 5.989180 | lme4 |
Time | 5 | 5.934773 | 4.0256086 | 7.843938 | lme4 |
Time | 6 | 11.165711 | 9.2565468 | 13.074876 | lme4 |
# brms的结果
grp_brms <- bind_rows(
grp_coef_b_Intercept |> rename(.estimate = grp_b_Intercept),
grp_coef_b_Time |> rename(.estimate = grp_b_Time),
grp_coef_b_Time2 |> rename(.estimate = grp_b_Time2))
grp_brms <- grp_brms |>
select(term, grp, .estimate, .lower, .upper) |>
mutate(pkg = "brms")
head(grp_brms)
term | grp | .estimate | .lower | .upper | pkg |
---|---|---|---|---|---|
Intercept | 1 | 40.23462 | 36.31597 | 44.49116 | brms |
Intercept | 2 | 36.96176 | 32.95113 | 40.88590 | brms |
Intercept | 3 | 36.90182 | 33.05321 | 40.87062 | brms |
Intercept | 4 | 39.01910 | 35.06788 | 42.97679 | brms |
Intercept | 5 | 34.21601 | 29.87568 | 38.10023 | brms |
Intercept | 6 | 28.41645 | 23.69417 | 32.81245 | brms |
汇总并作图:
可以看到,两种方法得到的图像和置信区间基本一样
grp_combine <- bind_rows(grp_lmer, grp_brms)
grp_combine |>
ggplot() +
geom_pointrange(aes(x = .estimate, y = factor(grp), xmin = .lower, xmax = .upper, color = pkg),
alpha = .8,
position = position_dodge(width = 0.8)) +
facet_wrap(~term, scales = "free_x") +
labs(y = NULL) +
scale_color_simpsons()