library(lme4)
## Loading required package: Matrix
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
head(sleepstudy)
## Reaction Days Subject
## 1 249.5600 0 308
## 2 258.7047 1 308
## 3 250.8006 2 308
## 4 321.4398 3 308
## 5 356.8519 4 308
## 6 414.6901 5 308
str(sleepstudy)
## 'data.frame': 180 obs. of 3 variables:
## $ Reaction: num 250 259 251 321 357 ...
## $ Days : num 0 1 2 3 4 5 6 7 8 9 ...
## $ Subject : Factor w/ 18 levels "308","309","310",..: 1 1 1 1 1 1 1 1 1 1 ...
ggplot(sleepstudy, aes(x=Days, y=Reaction, group = Subject)) +
geom_line(aes(colour = Subject)) +
geom_point(aes(colour = Subject)) +
labs(title = "Thời gian phản ứng theo ngày thiếu ngủ cho từng đối tượng")
# Xây dựng Mô hình LMM
model_ri <- lmer(Reaction ~ Days + (1|Subject), data = sleepstudy)
summary(model_ri)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction ~ Days + (1 | Subject)
## Data: sleepstudy
##
## REML criterion at convergence: 1786.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2257 -0.5529 0.0109 0.5188 4.2506
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 1378.2 37.12
## Residual 960.5 30.99
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 251.4051 9.7467 22.8102 25.79 <2e-16 ***
## Days 10.4673 0.8042 161.0000 13.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.371
model_rs <- lmer(Reaction ~ Days + (Days|Subject), data = sleepstudy)
summary(model_rs)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Reaction ~ Days + (Days | Subject)
## Data: sleepstudy
##
## REML criterion at convergence: 1743.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9536 -0.4634 0.0231 0.4634 5.1793
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 612.10 24.741
## Days 35.07 5.922 0.07
## Residual 654.94 25.592
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 251.405 6.825 17.000 36.838 < 2e-16 ***
## Days 10.467 1.546 17.000 6.771 3.26e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.138
model_ri_ml <- lmer(Reaction ~ Days + (1|Days), data = sleepstudy, REML = FALSE)
## boundary (singular) fit: see help('isSingular')
model_rs_ml <- lmer(Reaction ~ Days + (Days|Subject), data = sleepstudy, REML = FALSE)
AIC(model_ri_ml, model_rs_ml)
## df AIC
## model_ri_ml 4 1908.293
## model_rs_ml 6 1763.939
BIC(model_ri_ml, model_rs_ml)
## df BIC
## model_ri_ml 4 1921.065
## model_rs_ml 6 1783.097
anova(model_ri_ml, model_rs_ml)
## Data: sleepstudy
## Models:
## model_ri_ml: Reaction ~ Days + (1 | Days)
## model_rs_ml: Reaction ~ Days + (Days | Subject)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## model_ri_ml 4 1908.3 1921.1 -950.15 1900.3
## model_rs_ml 6 1763.9 1783.1 -875.97 1751.9 148.35 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(naniar)
set.seed(123)
sleep_miss <- sleepstudy
sleep_miss$Reaction[sample(1:nrow(sleep_miss), 36)] <- NA
gg_miss_var(sleep_miss)
vis_miss(sleep_miss)
## Bước 2: Kiểm định thống kê Little’s MCAR Test
mcar_test(sleep_miss)
## # A tibble: 1 × 4
## statistic df p.value missing.patterns
## <dbl> <dbl> <dbl> <int>
## 1 4.66 2 0.0973 2
sleep_miss <- sleep_miss %>%
mutate(Reaction_NA = is.na(Reaction))
t.test(Days ~ Reaction_NA, data = sleep_miss)
##
## Welch Two Sample t-test
##
## data: Days by Reaction_NA
## t = 0.12694, df = 52.815, p-value = 0.8995
## alternative hypothesis: true difference in means between group FALSE and group TRUE is not equal to 0
## 95 percent confidence interval:
## -1.027915 1.166803
## sample estimates:
## mean in group FALSE mean in group TRUE
## 4.513889 4.444444
qqnorm(resid(model_rs), main = "Q-Q Plot của Phần dư (Residuals)")
qqline(resid(model_rs), col = "red", lwd = 2)
## Biểu đồ Residuals vs. Fitted (Kiểm tra phương sai đồng nhất)
plot(model_rs)
library(performance)
library(see)
check_model(model_rs)