pacman::p_load(foreign, tidyverse, lme4, merTools)
# file location
fLoc <- "C:/Users/HANK/Desktop/HOMEWORK/eg_hlm.sav"
# read spss format and covert it to data frame
dta <- read.spss(fLoc) %>% as.data.frame
# check data structure
str(dta)
## 'data.frame': 7230 obs. of 12 variables:
## $ size : num 380 380 380 380 380 380 380 380 380 380 ...
## $ lowinc : num 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 ...
## $ mobility: num 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 ...
## $ female : num 0 0 0 0 0 0 0 0 0 0 ...
## $ black : num 0 0 0 0 0 0 0 0 0 0 ...
## $ hispanic: num 1 1 1 0 0 0 0 0 1 1 ...
## $ year : num 0.5 1.5 2.5 -1.5 -0.5 0.5 1.5 2.5 -1.5 -0.5 ...
## $ grade : num 2 3 4 0 1 2 3 4 0 1 ...
## $ math : num 1.146 1.134 2.3 -1.303 0.439 ...
## $ retained: num 0 0 0 0 0 0 0 0 0 0 ...
## $ school : Factor w/ 60 levels " 2020",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ cid : Factor w/ 1721 levels " 101480302",..: 244 244 244 248 248 248 248 248 253 253 ...
# set black-and-white theme
theme_set(theme_bw())
ggplot(dta, aes(year, math, group = cid)) +
geom_point(alpha = .1) +
geom_line(alpha = .1) +
stat_smooth(aes(group = school), method = "lm", se = F, lwd = .6) +
stat_smooth(aes(group = 1), method = "lm", se = F, col = "magenta") +
labs(x = "Year of age (centered)", y = "Math score")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
# nested specification with random intercepts only
# fixed-effect for slope
summary(m1 <- lmer(math ~ year + (1 | school/cid), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ year + (1 | school/cid)
## Data: dta
##
## REML criterion at convergence: 16759.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8093 -0.5831 -0.0270 0.5566 6.0867
##
## Random effects:
## Groups Name Variance Std.Dev.
## cid:school (Intercept) 0.6699 0.8185
## school (Intercept) 0.1869 0.4324
## Residual 0.3470 0.5891
## Number of obs: 7230, groups: cid:school, 1721; school, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.780482 0.061088 -12.78
## year 0.746123 0.005396 138.26
##
## Correlation of Fixed Effects:
## (Intr)
## year -0.031
fixed-effect的斜率
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
# random slopes
summary(m2 <- lmer(math ~ ( year | school/cid), data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: math ~ (year | school/cid)
## Data: dta
##
## REML criterion at convergence: 16553.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2231 -0.5611 -0.0308 0.5312 5.1579
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## cid:school (Intercept) 0.64128 0.8008
## year 0.01132 0.1064 0.55
## school (Intercept) 0.92678 0.9627
## year 0.60804 0.7798 0.92
## Residual 0.30114 0.5488
## Number of obs: 7230, groups: cid:school, 1721; school, 60
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -1.64549 0.05462 59.88455 -30.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
random effect的斜率
# compare models
anova(m1, m2)
## refitting model(s) with ML (instead of REML)
## Data: dta
## Models:
## m1: math ~ year + (1 | school/cid)
## m2: math ~ (year | school/cid)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m1 5 16757 16792 -8373.5 16747
## m2 8 16566 16621 -8275.0 16550 197.1 3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# merTools
# estimate random-effects parameters by simulation
m2_re <- REsim(m2)
plotREsim(m2_re)
#m1
plot(m1, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)
#m2
plot(m2, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)